dictengine.cpp 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #include <QFile>
  2. #include <QTextStream>
  3. #include <QString>
  4. #include <QStringList>
  5. #include <QtDebug>
  6. #include "dictengine.h"
  7. #include "base64.h"
  8. DictEngine::DictEngine(QString path, QString name, QObject *parent) :
  9. QObject(parent), datName(name)
  10. {
  11. QString indexFilePath = path + "/" + name + ".index";
  12. indexFileHandle = new QFile(indexFilePath);
  13. if(!indexFileHandle->open(QIODevice::ReadOnly | QIODevice::Text))
  14. throw QString(tr("File %1 could not be opened.").arg(indexFilePath));
  15. streamIndex = new QTextStream();
  16. streamIndex->setDevice(indexFileHandle);
  17. streamIndex->setCodec("UTF-8");
  18. indexEndPos = indexFileHandle->size() - 1;
  19. QString dictFilePath = path + "/" + name + ".dict";
  20. dictFileHandle = new QFile(dictFilePath);
  21. if(!dictFileHandle->open(QIODevice::ReadOnly | QIODevice::Text))
  22. throw QString(tr("File %1 could not be opened.").arg(dictFilePath));
  23. streamDict = new QTextStream();
  24. streamDict->setDevice(dictFileHandle);
  25. streamDict->setCodec("UTF-8");
  26. sugg = new QList<DictRecord>();
  27. }
  28. DictEngine::~DictEngine() {
  29. delete streamIndex;
  30. delete indexFileHandle;
  31. delete streamDict;
  32. delete dictFileHandle;
  33. delete sugg;
  34. }
  35. qint64 DictEngine::findFirstEntry(QString query) const
  36. {
  37. qint64 low = 0, high = indexEndPos, pos;
  38. query = locToASCII(query);
  39. int length = query.length();
  40. QStringList sl;
  41. int cmp;
  42. /* perform binary search in the .index file */
  43. do {
  44. pos = (low + high)/2;
  45. sl = entryAt(pos);
  46. cmp = QString::compare(query, locToASCII(sl[0].left(length)), Qt::CaseInsensitive);
  47. if(low == pos || high == pos) /* no hits */
  48. return -1;
  49. if(cmp < 0) { /* a ... query ... sl[0] */
  50. high = pos;
  51. }
  52. else if(cmp > 0) { /* a ... sl[0] ... query */
  53. low = pos;
  54. }
  55. } while(cmp != 0);
  56. /* go up to find a non-matching entry */
  57. while(QString::compare(query, locToASCII(sl[0].left(length)), Qt::CaseInsensitive) == 0)
  58. {
  59. pos -= max_index_line_length;
  60. sl = entryAt(pos);
  61. }
  62. /* find a matching entry (this ensures it's the first match in the file) */
  63. QString str;
  64. while(QString::compare(query, locToASCII(str.left(length)), Qt::CaseInsensitive) != 0)
  65. {
  66. pos = streamIndex->pos();
  67. str = streamIndex->readLine();
  68. }
  69. return pos;
  70. }
  71. QStringList DictEngine::entryAt(qint64 pos) const /* returns next whole line after closest newline */
  72. {
  73. streamIndex->seek(pos);
  74. streamIndex->readLine();
  75. return streamIndex->readLine().split("\t");
  76. }
  77. QStringList DictEngine::suggestions(QString query)
  78. {
  79. // qDebug() << "sugg: " + __LINE__;
  80. sugg->clear();
  81. qint64 pos = findFirstEntry(query);
  82. if(pos == -1) {
  83. return QStringList();
  84. }
  85. streamIndex->seek(pos);
  86. //DictRecord entry_old, entry;
  87. QStringList entry_old(QString("")), entry;
  88. while(sugg->length() < number_of_suggestions) { /* make list of suggestions */
  89. entry = streamIndex->readLine().split("\t");
  90. if(entry_old[0] != entry[0]) /* do not append if the previous heading is the same */
  91. sugg->append(DictRecord(entry[0], entry[1], entry[2]));
  92. else /* add position and length of the new occurence of the word */
  93. (*sugg)[(sugg->length() - 1)].occ.append(DictOccurrence(entry[1], entry[2]));
  94. entry_old = entry;
  95. }
  96. QStringList retVal;
  97. foreach(DictRecord dr, *sugg)
  98. retVal.append(dr.heading);
  99. // qDebug() << "sugg: " + __LINE__;
  100. return retVal;
  101. }
  102. QString DictEngine::meaning(int index)
  103. {
  104. if(sugg->length() == 0)
  105. return QString(tr("No hits"));
  106. int headingLen = sugg->at(index).heading.toUtf8().length() + 1; /* number of bytes in heading plus the newline character */
  107. QString str, item;
  108. // item.reserve(1000);
  109. str = "<h3>" + sugg->at(index).heading + "</h3>";
  110. int len;
  111. const QList<DictOccurrence> &doL = sugg->at(index).occ;
  112. for(int i = 0; i < doL.length(); i++) {
  113. // foreach(DictOccurrence do, doL) {
  114. str += QString("<p><b>%1: </b>").arg(i + 1);
  115. streamDict->seek(base64toInt(doL[i].pos) + headingLen);
  116. len = base64toInt(doL[i].length) - headingLen;
  117. item = streamDict->read(len);
  118. item.truncate(2*len - item.toUtf8().length()); /* get rid of unintentionally read extra characters from the next entry */
  119. str += item + "</p>";
  120. }
  121. return str;
  122. }
  123. QString DictEngine::name() const
  124. {
  125. QString line;
  126. streamDict->seek(0);
  127. int i = 0; /* search for 00-database-short, if it's not found on */
  128. /* first lines, return file name without extension. */
  129. while((line != "00-database-short") && (i++ < 20)) {
  130. line = streamDict->readLine();
  131. }
  132. if(i == 20)
  133. return datName;
  134. return streamDict->readLine().trimmed();
  135. }
  136. QString DictEngine::databaseName() const
  137. {
  138. return datName;
  139. }
  140. qint64 DictEngine::base64toInt(QString str) const
  141. {
  142. int c;
  143. qint64 val = 0;
  144. for(int i = 0; (i < str.length()) && (i < sizeof(qint64)); i++) {
  145. c = static_cast<int>(str.at(str.length() - i - 1).toAscii());
  146. if((c & 0x80) || (base64_val[c] == -1))
  147. throw QString("Non-Base64 character was encountered.");
  148. val |= base64_val[c] << (i * 6);
  149. }
  150. if(val >= dictFileHandle->size())
  151. throw QString("Attempt to access dict file beyond its end.");
  152. return val;
  153. }
  154. QString DictEngine::locToASCII(QString input) const
  155. {
  156. return input;
  157. QString sharp_s = QString::fromLocal8Bit("ß");
  158. QString norm = input.normalized(QString::NormalizationForm_KD);
  159. QString output;
  160. output.reserve(input.length()*2); /* the highest possible needed size is <input length>*2 */
  161. foreach(const QChar& c, norm) {
  162. if(c < 0x80)
  163. output.append(c);
  164. else if(c == sharp_s.at(0))
  165. output.append('s');
  166. }
  167. return output;
  168. }