Natural Language Processing  0.1.0
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Pages
converter.h
1 
8 #ifndef CONVERTER_H
9 #define CONVERTER_H
10 #include <string>
11 #include <list>
12 #include <QtSql>
13 #include <sstream>
14 
15 #include "../Tokenizer/stokenize.h"
16 #include "../../CONFIG/config.h"
17 #include "word.h"
18 
19 using namespace std;
20 
21 namespace NLP
22 {
23 
24 
28  class Converter
29  {
30  private:
31  string mSentence; // original sentence
32  list<Token> mTokens; // store all essential tokens to be processed
33 
34  // Helper function
35  void extractTokens();
36  void convertAllToLower();
37  set<WordType> getWordTypes(string wordname, size_t recurDepth = 0);
38  set<string> getUnqStringWT(string &rawTypesCollections);
39 
40  public:
41  static QSqlDatabase mDb;
42  Converter ();
43  Converter(const string& sentence);
44 
45  void setString(const string& newstr);
46  vector<Word> getWords();
47  ~Converter();
48  };
49 
50  QSqlDatabase Converter::mDb = QSqlDatabase::addDatabase("QSQLITE", "ENG_DICT");
51 
59  Converter::Converter()
60  {
61  mDb.setDatabaseName(DB_PATH); // database in main project
62  if( !mDb.open() ) {
63  qDebug() << mDb.lastError();
64  qFatal( "Failed to connect to database." );
65  throw std::invalid_argument("Error: Invalid database");
66  }
67 // else { qDebug() << "Debug: Database opened." << endl; }
68  }
69 
76  Converter::Converter(const string &sentence) : Converter()
77  {
78  mSentence = sentence;
79  extractTokens(); // extract tokens
80  }
81 
82 
91  void Converter::extractTokens()
92  {
93  STokenize mySTokenize(mSentence);
94  while (mySTokenize.More()) {
95  mTokens.push_back(mySTokenize.nextToken());
96  }
97  convertAllToLower ();
98  }
99 
105  void Converter::convertAllToLower()
106  {
107  for(Token& tok : mTokens)
108  tok.alltolower ();
109  }
110 
116  void Converter::setString(const string &newstr)
117  {
118  mTokens.clear ();
119  mSentence = newstr;
120  extractTokens ();
121  }
122 
132  set<WordType> Converter::getWordTypes(string wordname, size_t recurDepth)
133  {
134 
136  QSqlQuery mLiteQr(this->mDb);
137  string qrStr = "SELECT DISTINCT wordtype FROM entries WHERE word = '" + wordname + "'";
138  string rawTypesCollections;
139 
141  mLiteQr.prepare(qrStr.c_str());
142  if( !mLiteQr.exec() ) {
143  qDebug() << mLiteQr.lastError();
144  throw std::invalid_argument("Invalid query.");
145  }
146 
148  while( mLiteQr.next() ) {
149  rawTypesCollections += string(" ");
150  rawTypesCollections += mLiteQr.value(0).toString().toStdString();
151  }
152 
159  if (rawTypesCollections.empty() && wordname.length() > 2 && recurDepth < 3) {
160  return getWordTypes(wordname.substr(0, wordname.length() - 1), recurDepth + 1);
161  } else if (rawTypesCollections.empty () && (recurDepth > 2 || wordname.length () <= 2)){
162  set<WordType> nameEntity;
163  nameEntity.insert (WordType::noun);
164  return nameEntity;
165  }
166 
168  set<string> uniqueWT = getUnqStringWT (rawTypesCollections);
169 
171  set<WordType> WordTypes;
172  for(string WT : uniqueWT)
173  WordTypes.insert(WordTypeMap[WT]);
174  return WordTypes;
175  }
176 
183  set<string> Converter::getUnqStringWT(string& rawTypesCollections)
184  {
186  rawTypesCollections.erase(std::remove(rawTypesCollections.begin()
187  , rawTypesCollections.end(), ',')
188  , rawTypesCollections.end());
189  rawTypesCollections.erase(std::remove(rawTypesCollections.begin(),
190  rawTypesCollections.end(), '&')
191  , rawTypesCollections.end());
192  string buf; // Have a buffer string
193  stringstream ss(rawTypesCollections); // Insert the string into a stream
194 
195  set<string> uniqueWT;
196  while (ss >> buf)
197  uniqueWT.insert(buf);
198  return uniqueWT;
199  }
200 
205  vector<Word> Converter::getWords()
206  {
207  // NOTE : Move database declaration here
208 
209  vector<Word> WordList;
210  set <WordType> foundTypes;
211 
212  // try filling rolesj
213  for(Token token : mTokens)
214  {
215  if(token.getType() == TokenType::ALPHA) {
216  string wordStr = token.getTokenString();
217  STokenize::capitalize(wordStr);
218  foundTypes = getWordTypes(wordStr);
219  WordList.push_back(Word(token, foundTypes));
220 
221 // cout << wordStr << " : ";
222 // for(auto WordType : foundTypes)
223 // cout << WordStringMap[WordType] << ",";
224 // cout << endl;
225  }
226  }
227  // printing all roles
228 
229  return WordList;
230  }
231 
236  {
237  }
238 
239 } /* NLP */
240 
241 #endif /* !CONVERTER_H */
242 
Definition: stokenize.h:13
Class to process a sentence and generate the list of words and it's corresponding datas...
Definition: converter.h:28
void setString(const string &newstr)
Converter::setString set string to reconvert.
Definition: converter.h:116
const QString DB_PATH
Relative path of where Dictionary database is Note : Path is relative to where "build" folder is gene...
Definition: config.h:13
Definition: word.h:21
static void capitalize(string &s)
Static function that is used to capitalize external string.
Definition: stokenize.cpp:62
vector< Word > getWords()
Function to finalize the list of words to be returned, along with its corresponding roles...
Definition: converter.h:205
Definition: token.h:42
~Converter()
no dynamic thing to destroy for now
Definition: converter.h:235