WordDict.cc

Go to the documentation of this file.
00001 //
00002 // WordDict.cc
00003 //
00004 // Part of the ht://Dig package   <http://www.htdig.org/>
00005 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00006 // For copyright details, see the file COPYING in your distribution
00007 // or the GNU General Public License version 2 or later
00008 // <http://www.gnu.org/copyleft/gpl.html>
00009 //
00010 // $Id: WordDict_8cc-source.html,v 1.1 2008/06/08 10:13:08 sebdiaz Exp $
00011 //
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif /* HAVE_CONFIG_H */
00015 
00016 #include <fcntl.h>
00017 
00018 #include "WordDict.h"
00019 #include "WordListOne.h"
00020 
00021 #define WORD_DICT_CURSOR_FIRST  1
00022 #define WORD_DICT_CURSOR_NEXT   2
00023 
00024 class WordDictCursor {
00025 public:
00026   int info;
00027   String prefix;
00028   WordDBCursor* cursor;
00029 };
00030 
00031 WordDict::~WordDict()
00032 {
00033   delete db;
00034 }
00035 
00036 int WordDict::Initialize(WordList* nwords)
00037 {
00038   words = nwords;
00039   db = new WordDB(nwords->GetContext()->GetDBInfo());
00040   return OK;
00041 }
00042 
00043 int WordDict::Open()
00044 {
00045   const String& filename = words->Filename();
00046   int flags = words->Flags();
00047 
00048   db->set_pagesize(words->Pagesize());
00049 
00050   return db->Open(filename, "dict", DB_BTREE, flags, 0666, WORD_DB_DICT) == 0 ? OK : NOTOK;
00051 }
00052 
00053 int WordDict::Remove()
00054 {
00055   return db->Remove(words->Filename(), "dict") == 0 ? OK : NOTOK;
00056 }
00057 
00058 int WordDict::Close()
00059 {
00060   return db->Close() == 0 ? OK : NOTOK;
00061 }
00062 
00063 int WordDict::Serial(const String& word, unsigned int& serial)
00064 {
00065   int ret;
00066   WordDictRecord entry;
00067   if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00068     return NOTOK;
00069   if(ret == DB_NOTFOUND) {
00070     words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
00071     if(entry.Put(db, word) != 0) return NOTOK;
00072   }
00073   serial = entry.id;
00074 
00075   return OK;
00076 }
00077 
00078 int WordDict::SerialExists(const String& word, unsigned int& serial)
00079 {
00080   int ret;
00081   WordDictRecord entry;
00082   if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00083     return NOTOK;
00084 
00085   serial = ret == DB_NOTFOUND ? WORD_DICT_SERIAL_INVALID : entry.id;
00086 
00087   return OK;
00088 }
00089 
00090 int WordDict::SerialRef(const String& word, unsigned int& serial)
00091 {
00092   int ret;
00093   WordDictRecord entry;
00094   if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00095     return NOTOK;
00096   if(ret == DB_NOTFOUND)
00097     words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
00098   entry.count++;
00099   if(entry.Put(db, word) != 0) return NOTOK;
00100   serial = entry.id;
00101 
00102   return OK;
00103 }
00104 
00105 int WordDict::Noccurrence(const String& word, unsigned int& noccurrence) const
00106 {
00107   if(word.empty()) {
00108     fprintf(stderr, "WordDict::Noccurrence: null word\n");
00109     return NOTOK;
00110   }
00111   WordDictRecord entry;
00112   noccurrence = 0;
00113   int ret;
00114   if((ret = entry.Get(db, word)) != 0) {
00115     if(ret != DB_NOTFOUND)
00116       return NOTOK;
00117   }
00118   noccurrence = entry.count;
00119 
00120   return OK;
00121 }
00122 
00123 int WordDict::Normalize(String& word) const
00124 {
00125   const WordType& wtype = words->GetContext()->GetType();
00126 
00127   return wtype.Normalize(word);
00128 }
00129 
00130 int WordDict::Incr(const String& word, unsigned int incr)
00131 {
00132   int ret;
00133   WordDictRecord entry;
00134   if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00135     return NOTOK;
00136   if(ret == DB_NOTFOUND)
00137     words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
00138   entry.count += incr;
00139   if(entry.Put(db, word) != 0) return NOTOK;
00140   return OK;
00141 }
00142 
00143 int WordDict::Decr(const String& word, unsigned int decr)
00144 {
00145   WordDictRecord entry;
00146   int ret;
00147   if((ret = entry.Get(db, word)) != 0) {
00148     if(ret == DB_NOTFOUND)
00149       fprintf(stderr, "WordDict::Unref(%s) Unref on non existing word occurrence\n", (const char*)word);
00150     return NOTOK;
00151   }
00152   entry.count -= decr;
00153   if(entry.count > 0)
00154     ret = entry.Put(db, word) == 0 ? OK : NOTOK;
00155   else
00156     ret = entry.Del(db, word) == 0 ? OK : NOTOK;
00157 
00158   return ret;
00159 }
00160 
00161 int WordDict::Put(const String& word, unsigned int noccurrence)
00162 {
00163   int ret;
00164   WordDictRecord entry;
00165   if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
00166     return NOTOK;
00167   if(ret == DB_NOTFOUND)
00168     words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
00169   entry.count = noccurrence;
00170   if(entry.Put(db, word) != 0) return NOTOK;
00171   return OK;
00172 }
00173 
00174 List *WordDict::Words() const
00175 {
00176   String key;
00177   String coded;
00178   WordDBCursor* cursor = db->Cursor();
00179   List* list = new List;
00180 
00181   while(cursor->Get(key, coded, DB_NEXT) == 0)
00182     list->Add(new String(key));
00183 
00184   delete cursor;
00185 
00186   return list;
00187 }
00188 
00189 int WordDict::Exists(const String& word) const
00190 {
00191   String tmp_word = word;
00192   String coded;
00193   
00194   return db->Get(0, tmp_word, coded, 0) == 0;
00195 }
00196 
00197 WordDictCursor* WordDict::Cursor() const
00198 {
00199   WordDictCursor* cursor = new WordDictCursor;
00200   cursor->cursor = db->Cursor();
00201 
00202   return cursor;
00203 }
00204 
00205 int WordDict::Next(WordDictCursor* cursor, String& word, WordDictRecord& record)
00206 {
00207   String coded;
00208   int ret = cursor->cursor->Get(word, coded, DB_NEXT);
00209   if(ret != 0) {
00210     delete cursor->cursor;
00211     delete cursor;
00212   } else {
00213     record.Unpack(coded);
00214   }
00215   return ret;
00216 }
00217 
00218 WordDictCursor* WordDict::CursorPrefix(const String& prefix) const
00219 {
00220   WordDictCursor* cursor = new WordDictCursor;
00221   cursor->cursor = db->Cursor();
00222   cursor->prefix = prefix;
00223   cursor->info = WORD_DICT_CURSOR_FIRST;
00224 
00225   return cursor;
00226 }
00227 
00228 int WordDict::NextPrefix(WordDictCursor* cursor, String& word, WordDictRecord& record)
00229 {
00230   String coded;
00231   int ret;
00232   if(cursor->info == WORD_DICT_CURSOR_FIRST) {
00233     word = cursor->prefix;
00234     ret = cursor->cursor->Get(word, coded, DB_SET_RANGE);
00235     cursor->info = WORD_DICT_CURSOR_NEXT;
00236   } else {
00237     ret = cursor->cursor->Get(word, coded, DB_NEXT);
00238   }
00239   //
00240   // Stop walking when 1) DB_NOTFOUND, 2) the word found is shorter than
00241   // the required prefix, 3) the word found does not start with the 
00242   // required prefix.
00243   //
00244   if(ret != 0 ||
00245      cursor->prefix.length() > word.length() ||
00246      strncmp(cursor->prefix.get(), word.get(), cursor->prefix.length())) {
00247     delete cursor->cursor;
00248     delete cursor;
00249     if(ret == 0) ret = DB_NOTFOUND;
00250   } else {
00251     record.Unpack(coded);
00252   }
00253   return ret;
00254 }
00255 
00256 int WordDict::Write(FILE* f)
00257 {
00258   WordDBCursor* cursor = db->Cursor();
00259   String key;
00260   String coded;
00261   unsigned int occurrence;
00262   unsigned int id;
00263 
00264   while(cursor->Get(key, coded, DB_NEXT) == 0) {
00265     int offset = 0;
00266     coded.ber_shift(offset, occurrence);
00267     coded.ber_shift(offset, id);
00268     fprintf(f, "%s %d %d\n", (char*)key, id, occurrence);
00269   }
00270 
00271   delete cursor;
00272 
00273   return OK;
00274 }

Generated on Sun Jun 8 10:56:40 2008 for GNUmifluz by  doxygen 1.5.5