My Project
 All Classes Namespaces Functions Pages
converter.h
1 
8 #ifndef CONVERTER_H
9 #define CONVERTER_H
10 #include <string>
11 #include <list>
12 #include <QtSql>
13 #include <sstream>
14 
15 #include "../Tokenizer/stokenize.h"
16 #include "config.h"
17 #include "word.h"
18 
19 using namespace std;
20 
21 namespace NLP
22 {
23 
27  class Converter
28  {
29  private:
30  string mSentence; // original sentence
31  list<Token> mTokens; // store all essential tokens to be processed
32 
33  static QSqlDatabase mDb;
34 
35  // Helper function
36  void extractTokens();
37  set<WordType> getWordTypes(string wordname, size_t recurDepth = 0);
38 
39  public:
40  Converter(const string& sentence);
41 
42  list<Word> getWords();
43  ~Converter();
44  };
45 
46  QSqlDatabase Converter::mDb = QSqlDatabase::addDatabase("QSQLITE");
47 
54  Converter::Converter(const string &sentence) :
55  mSentence(sentence)
56  {
57  extractTokens(); // extract tokens
58  mDb.setDatabaseName(DB_PATH); // database in main project
59  if( !mDb.open() ) {
60  qDebug() << mDb.lastError();
61  qFatal( "Failed to connect to database." );
62  throw std::invalid_argument("Error: Invalid database");
63  } else { qDebug() << "Debug: Database opened." << endl; }
64  }
65 
66 
75  void Converter::extractTokens()
76  {
77  STokenize mySTokenize(mSentence);
78  while (mySTokenize.More()) {
79  mTokens.push_back(mySTokenize.nextToken());
80  }
81  }
82 
91  set<WordType> Converter::getWordTypes(string wordname, size_t recurDepth)
92  {
93 
95  QSqlQuery mLiteQr;
96  string qrStr = "SELECT DISTINCT wordtype FROM entries WHERE word = '" + wordname + "'";
97  string rawTypesCollections;
98  set<string> uniqueWT; // Create vector to hold our words
99 
101  mLiteQr.prepare(qrStr.c_str());
102  if( !mLiteQr.exec() ) {
103  qDebug() << mLiteQr.lastError();
104  throw std::invalid_argument("Invalid query.");
105  }
106 
108  QSqlRecord rec = mLiteQr.record();
109  while( mLiteQr.next() ) {
110  rawTypesCollections += string(" ");
111  rawTypesCollections += mLiteQr.value(0).toString().toStdString();
112  }
113 
118  if (rawTypesCollections.empty() && wordname.length() > 1 && recurDepth < 3) {
119 // cout << "len WT of " << wordname << " : " << rawTypesCollections.length() << endl;
120 // cout << " calling recursive for : " << wordname << endl;
121  return getWordTypes(wordname.substr(0, wordname.length() - 1), recurDepth + 1);
122  }
123 
125  rawTypesCollections.erase(std::remove(rawTypesCollections.begin(),
126  rawTypesCollections.end(), ',')
127  , rawTypesCollections.end());
128  rawTypesCollections.erase(std::remove(rawTypesCollections.begin(),
129  rawTypesCollections.end(), '&')
130  , rawTypesCollections.end());
131  string buf; // Have a buffer string
132  stringstream ss(rawTypesCollections); // Insert the string into a stream
133 
134  while (ss >> buf)
135  uniqueWT.insert(buf);
136 
138  set<WordType> WordTypes;
139  for(string WT : uniqueWT)
140  WordTypes.insert(WordTypeMap[WT]);
141  return WordTypes;
142  }
143 
148  list<Word> Converter::getWords()
149  {
150  // NOTE : Move database declaration here
151 
152  list<Word> WordList;
153  set <WordType> foundTypes;
154 
155  // try filling rolesj
156  for(Token token : mTokens)
157  {
158  if(token.getType() == TokenType::ALPHA) {
159  string wordStr = token.getTokenString();
160  STokenize::capitalize(wordStr);
161  foundTypes = getWordTypes(wordStr);
162  WordList.push_back(Word(token, foundTypes));
163 
164 // cout << wordStr << " : ";
165 // for(auto WordType : foundTypes)
166 // cout << WordStringMap[WordType] << ",";
167 // cout << endl;
168  }
169  }
170  // printing all roles
171 
172  return WordList;
173  }
174 
179  {
180  }
181 
182 } /* NLP */
183 
184 #endif /* !CONVERTER_H */
185 
list< Word > getWords()
Function to finalize the list of words to be returned, along with its corresponding roles...
Definition: converter.h:148
Definition: stokenize.h:13
Class to process a sentence and generate the list of words and it's corresponding datas...
Definition: converter.h:27
A Class to store token and its corresponding tags and definitions.
Definition: word.h:25
static void capitalize(string &s)
Static function that is used to capitalize external string.
Definition: stokenize.cpp:62
Definition: token.h:41
~Converter()
no dynamic thing to destroy for now
Definition: converter.h:178