15 #include "../Tokenizer/stokenize.h"
16 #include "../../CONFIG/config.h"
36 void convertAllToLower();
37 set<WordType> getWordTypes(
string wordname,
size_t recurDepth = 0);
38 set<string> getUnqStringWT(
string &rawTypesCollections);
41 static QSqlDatabase mDb;
45 void setString(
const string& newstr);
46 vector<Word> getWords();
50 QSqlDatabase Converter::mDb = QSqlDatabase::addDatabase(
"QSQLITE",
"ENG_DICT");
59 Converter::Converter()
63 qDebug() << mDb.lastError();
64 qFatal(
"Failed to connect to database." );
65 throw std::invalid_argument(
"Error: Invalid database");
76 Converter::Converter(
const string &sentence) :
Converter()
91 void Converter::extractTokens()
94 while (mySTokenize.More()) {
95 mTokens.push_back(mySTokenize.nextToken());
105 void Converter::convertAllToLower()
107 for(
Token& tok : mTokens)
132 set<WordType> Converter::getWordTypes(
string wordname,
size_t recurDepth)
136 QSqlQuery mLiteQr(this->mDb);
137 string qrStr =
"SELECT DISTINCT wordtype FROM entries WHERE word = '" + wordname +
"'";
138 string rawTypesCollections;
141 mLiteQr.prepare(qrStr.c_str());
142 if( !mLiteQr.exec() ) {
143 qDebug() << mLiteQr.lastError();
144 throw std::invalid_argument(
"Invalid query.");
148 while( mLiteQr.next() ) {
149 rawTypesCollections += string(
" ");
150 rawTypesCollections += mLiteQr.value(0).toString().toStdString();
159 if (rawTypesCollections.empty() && wordname.length() > 2 && recurDepth < 3) {
160 return getWordTypes(wordname.substr(0, wordname.length() - 1), recurDepth + 1);
161 }
else if (rawTypesCollections.empty () && (recurDepth > 2 || wordname.length () <= 2)){
162 set<WordType> nameEntity;
163 nameEntity.insert (WordType::noun);
168 set<string> uniqueWT = getUnqStringWT (rawTypesCollections);
171 set<WordType> WordTypes;
172 for(
string WT : uniqueWT)
173 WordTypes.insert(WordTypeMap[WT]);
183 set<string> Converter::getUnqStringWT(
string& rawTypesCollections)
186 rawTypesCollections.erase(std::remove(rawTypesCollections.begin()
187 , rawTypesCollections.end(),
',')
188 , rawTypesCollections.end());
189 rawTypesCollections.erase(std::remove(rawTypesCollections.begin(),
190 rawTypesCollections.end(),
'&')
191 , rawTypesCollections.end());
193 stringstream ss(rawTypesCollections);
195 set<string> uniqueWT;
197 uniqueWT.insert(buf);
209 vector<Word> WordList;
210 set <WordType> foundTypes;
213 for(
Token token : mTokens)
215 if(token.getType() == TokenType::ALPHA) {
216 string wordStr = token.getTokenString();
218 foundTypes = getWordTypes(wordStr);
219 WordList.push_back(
Word(token, foundTypes));
Definition: stokenize.h:13
Class to process a sentence and generate the list of words and it's corresponding datas...
Definition: converter.h:28
void setString(const string &newstr)
Converter::setString set string to reconvert.
Definition: converter.h:116
const QString DB_PATH
Relative path of where Dictionary database is Note : Path is relative to where "build" folder is gene...
Definition: config.h:13
static void capitalize(string &s)
Static function that is used to capitalize external string.
Definition: stokenize.cpp:62
vector< Word > getWords()
Function to finalize the list of words to be returned, along with its corresponding roles...
Definition: converter.h:205
~Converter()
no dynamic thing to destroy for now
Definition: converter.h:235