00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015
00016 #include <fcntl.h>
00017
00018 #include "WordDict.h"
00019 #include "WordListOne.h"
00020
00021 #define WORD_DICT_CURSOR_FIRST 1
00022 #define WORD_DICT_CURSOR_NEXT 2
00023
00024 class WordDictCursor {
00025 public:
00026 int info;
00027 String prefix;
00028 WordDBCursor* cursor;
00029 };
00030
00031 WordDict::~WordDict()
00032 {
00033 delete db;
00034 }
00035
00036 int WordDict::Initialize(WordList* nwords)
00037 {
00038 words = nwords;
00039 db = new WordDB(nwords->GetContext()->GetDBInfo());
00040 return OK;
00041 }
00042
00043 int WordDict::Open()
00044 {
00045 const String& filename = words->Filename();
00046 int flags = words->Flags();
00047
00048 db->set_pagesize(words->Pagesize());
00049
00050 return db->Open(filename, "dict", DB_BTREE, flags, 0666, WORD_DB_DICT) == 0 ? OK : NOTOK;
00051 }
00052
00053 int WordDict::Remove()
00054 {
00055 return db->Remove(words->Filename(), "dict") == 0 ? OK : NOTOK;
00056 }
00057
00058 int WordDict::Close()
00059 {
00060 return db->Close() == 0 ? OK : NOTOK;
00061 }
00062
00063 int WordDict::Serial(const String& word, unsigned int& serial)
00064 {
00065 int ret;
00066 WordDictRecord entry;
00067 if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00068 return NOTOK;
00069 if(ret == DB_NOTFOUND) {
00070 words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
00071 if(entry.Put(db, word) != 0) return NOTOK;
00072 }
00073 serial = entry.id;
00074
00075 return OK;
00076 }
00077
00078 int WordDict::SerialExists(const String& word, unsigned int& serial)
00079 {
00080 int ret;
00081 WordDictRecord entry;
00082 if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00083 return NOTOK;
00084
00085 serial = ret == DB_NOTFOUND ? WORD_DICT_SERIAL_INVALID : entry.id;
00086
00087 return OK;
00088 }
00089
00090 int WordDict::SerialRef(const String& word, unsigned int& serial)
00091 {
00092 int ret;
00093 WordDictRecord entry;
00094 if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00095 return NOTOK;
00096 if(ret == DB_NOTFOUND)
00097 words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
00098 entry.count++;
00099 if(entry.Put(db, word) != 0) return NOTOK;
00100 serial = entry.id;
00101
00102 return OK;
00103 }
00104
00105 int WordDict::Noccurrence(const String& word, unsigned int& noccurrence) const
00106 {
00107 if(word.empty()) {
00108 fprintf(stderr, "WordDict::Noccurrence: null word\n");
00109 return NOTOK;
00110 }
00111 WordDictRecord entry;
00112 noccurrence = 0;
00113 int ret;
00114 if((ret = entry.Get(db, word)) != 0) {
00115 if(ret != DB_NOTFOUND)
00116 return NOTOK;
00117 }
00118 noccurrence = entry.count;
00119
00120 return OK;
00121 }
00122
00123 int WordDict::Normalize(String& word) const
00124 {
00125 const WordType& wtype = words->GetContext()->GetType();
00126
00127 return wtype.Normalize(word);
00128 }
00129
00130 int WordDict::Incr(const String& word, unsigned int incr)
00131 {
00132 int ret;
00133 WordDictRecord entry;
00134 if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00135 return NOTOK;
00136 if(ret == DB_NOTFOUND)
00137 words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
00138 entry.count += incr;
00139 if(entry.Put(db, word) != 0) return NOTOK;
00140 return OK;
00141 }
00142
00143 int WordDict::Decr(const String& word, unsigned int decr)
00144 {
00145 WordDictRecord entry;
00146 int ret;
00147 if((ret = entry.Get(db, word)) != 0) {
00148 if(ret == DB_NOTFOUND)
00149 fprintf(stderr, "WordDict::Unref(%s) Unref on non existing word occurrence\n", (const char*)word);
00150 return NOTOK;
00151 }
00152 entry.count -= decr;
00153 if(entry.count > 0)
00154 ret = entry.Put(db, word) == 0 ? OK : NOTOK;
00155 else
00156 ret = entry.Del(db, word) == 0 ? OK : NOTOK;
00157
00158 return ret;
00159 }
00160
00161 int WordDict::Put(const String& word, unsigned int noccurrence)
00162 {
00163 int ret;
00164 WordDictRecord entry;
00165 if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00166 return NOTOK;
00167 if(ret == DB_NOTFOUND)
00168 words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
00169 entry.count = noccurrence;
00170 if(entry.Put(db, word) != 0) return NOTOK;
00171 return OK;
00172 }
00173
00174 List *WordDict::Words() const
00175 {
00176 String key;
00177 String coded;
00178 WordDBCursor* cursor = db->Cursor();
00179 List* list = new List;
00180
00181 while(cursor->Get(key, coded, DB_NEXT) == 0)
00182 list->Add(new String(key));
00183
00184 delete cursor;
00185
00186 return list;
00187 }
00188
00189 int WordDict::Exists(const String& word) const
00190 {
00191 String tmp_word = word;
00192 String coded;
00193
00194 return db->Get(0, tmp_word, coded, 0) == 0;
00195 }
00196
00197 WordDictCursor* WordDict::Cursor() const
00198 {
00199 WordDictCursor* cursor = new WordDictCursor;
00200 cursor->cursor = db->Cursor();
00201
00202 return cursor;
00203 }
00204
00205 int WordDict::Next(WordDictCursor* cursor, String& word, WordDictRecord& record)
00206 {
00207 String coded;
00208 int ret = cursor->cursor->Get(word, coded, DB_NEXT);
00209 if(ret != 0) {
00210 delete cursor->cursor;
00211 delete cursor;
00212 } else {
00213 record.Unpack(coded);
00214 }
00215 return ret;
00216 }
00217
00218 WordDictCursor* WordDict::CursorPrefix(const String& prefix) const
00219 {
00220 WordDictCursor* cursor = new WordDictCursor;
00221 cursor->cursor = db->Cursor();
00222 cursor->prefix = prefix;
00223 cursor->info = WORD_DICT_CURSOR_FIRST;
00224
00225 return cursor;
00226 }
00227
00228 int WordDict::NextPrefix(WordDictCursor* cursor, String& word, WordDictRecord& record)
00229 {
00230 String coded;
00231 int ret;
00232 if(cursor->info == WORD_DICT_CURSOR_FIRST) {
00233 word = cursor->prefix;
00234 ret = cursor->cursor->Get(word, coded, DB_SET_RANGE);
00235 cursor->info = WORD_DICT_CURSOR_NEXT;
00236 } else {
00237 ret = cursor->cursor->Get(word, coded, DB_NEXT);
00238 }
00239
00240
00241
00242
00243
00244 if(ret != 0 ||
00245 cursor->prefix.length() > word.length() ||
00246 strncmp(cursor->prefix.get(), word.get(), cursor->prefix.length())) {
00247 delete cursor->cursor;
00248 delete cursor;
00249 if(ret == 0) ret = DB_NOTFOUND;
00250 } else {
00251 record.Unpack(coded);
00252 }
00253 return ret;
00254 }
00255
00256 int WordDict::Write(FILE* f)
00257 {
00258 WordDBCursor* cursor = db->Cursor();
00259 String key;
00260 String coded;
00261 unsigned int occurrence;
00262 unsigned int id;
00263
00264 while(cursor->Get(key, coded, DB_NEXT) == 0) {
00265 int offset = 0;
00266 coded.ber_shift(offset, occurrence);
00267 coded.ber_shift(offset, id);
00268 fprintf(f, "%s %d %d\n", (char*)key, id, occurrence);
00269 }
00270
00271 delete cursor;
00272
00273 return OK;
00274 }