00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #ifndef _WordTree_h
00036 #define _WordTree_h
00037
00038 #include <WordList.h>
00039
00040 #include <WordKeySemantic.h>
00041 #include <WordPermute.h>
00042 #include <WordCursorOne.h>
00043 #include <WordResults.h>
00044
00045 #define WORD_WALK_REDO 0x1000
00046 #define WORD_WALK_RESTART 0x2000
00047 #define WORD_WALK_NEXT 0x4000
00048 #define WORD_WALK_END_CACHE 0x8000
00049
00050
00051
00052
00053 #define WORD_SEARCH_NOPROXIMITY 1
00054
00055
00056
00057
00058 #define WORD_TREE_OR 1
00059 #define WORD_TREE_AND 2
00060 #define WORD_TREE_NEAR 3
00061 #define WORD_TREE_OPTIONAL 4
00062 #define WORD_TREE_LITERAL 5
00063 #define WORD_TREE_MANDATORY 6
00064 #define WORD_TREE_NOT 7
00065
00066 #define WORD_TREE_OP_SIZE 20
00067
00068
00069
00070
00071 #ifndef WORD_SEARCH_DEFAULT_PROXIMITY
00072 #define WORD_SEARCH_DEFAULT_PROXIMITY (-1)
00073 #endif
00074
00075 class WordTreeArg {
00076 public:
00077 WordTreeArg(WordList *words, int uniq, int has_uniq, int nproximity, int *document, int document_length, int location) {
00078 _words = words;
00079 _uniq = uniq;
00080 _has_uniq = has_uniq;
00081 _nproximity = nproximity;
00082 _document = document;
00083 _document_length = document_length;
00084 _location = location;
00085 _realm = 0;
00086 }
00087
00088 WordList *_words;
00089 int _uniq;
00090 int _has_uniq;
00091 int _nproximity;
00092 int *_document;
00093 int _document_length;
00094 int _location;
00095 int _realm;
00096 };
00097
00098 class WordTree : public WordCursorOne {
00099 public:
00100 WordTree(WordList* words) :
00101 WordCursorOne(words),
00102 key_semantic(words->GetContext())
00103 {
00104 proximity = 0;
00105 has_uniq = 0;
00106 results = 0;
00107 verbose = 0;
00108 }
00109
00110 virtual int ContextSaveList(StringList& list) const {
00111 return OK;
00112 }
00113
00114 virtual int ContextRestoreList(StringList& list) {
00115 return OK;
00116 }
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128 virtual int Prepare(WordTreeArg& arg) {
00129 int ret;
00130 proximity = arg._nproximity;
00131 has_uniq = arg._has_uniq;
00132 if((ret = key_semantic.Initialize(arg._document, arg._document_length, arg._location, arg._uniq)) != OK)
00133 return ret;
00134 key_semantic.Verbose(verbose);
00135 WordKey key(arg._words->GetContext());
00136
00137
00138
00139 if(!scope.empty()) {
00140 if(key.Set(scope) != OK) {
00141 fprintf(stderr, "WordTree::Prepare: setting scope %s failed\n", (char*)scope);
00142 return NOTOK;
00143 }
00144 }
00145
00146
00147
00148 if(!search.empty()) {
00149 unsigned int wordid = WORD_DICT_SERIAL_INVALID;
00150 if(arg._words->Dict()->SerialExists(search, wordid) != OK)
00151 return NOTOK;
00152 key.Set(WORD_KEY_WORD, wordid);
00153 }
00154 return WordCursorOne::Initialize(arg._words, key, 0, 0, HTDIG_WORDLIST_WALKER);
00155 }
00156
00157 virtual int WalkNextExclude(const WordKey& key);
00158
00159 virtual int Bounds(const WordKey& bottom, const WordKey& top) = 0;
00160
00161
00162
00163
00164 WordKey GetDocument() {
00165 WordKey found(words->GetContext());
00166 key_semantic.DocumentSet(GetFound().Key(), found);
00167 return found;
00168 }
00169
00170
00171
00172
00173 virtual int SetRealm(const WordKey& realm) {
00174 key_semantic.RealmCopy(realm, GetSearch());
00175 return OK;
00176 }
00177
00178
00179
00180 virtual int UndefinedRealm() {
00181 key_semantic.RealmUndefined(GetSearch());
00182 return OK;
00183 }
00184
00185
00186
00187 int ClearRealm() {
00188 WordKey realm(words->GetContext());
00189 key_semantic.RealmClear(realm);
00190 return SetRealm(realm);
00191 }
00192
00193
00194
00195
00196 virtual int Count(unsigned int& count) const { return Noccurrence(count); }
00197
00198 virtual int Noccurrence(unsigned int& noccurrence) const { return words->Noccurrence(search, noccurrence); }
00199
00200
00201
00202
00203
00204 virtual void SetInfo() { info = search; }
00205
00206
00207
00208
00209
00210 String GetInfo() { return info; }
00211
00212
00213
00214
00215 virtual inline int SetResults(WordResults* nresults) { results = nresults; return OK; }
00216 virtual inline WordResults* GetResults() { return results; }
00217
00218
00219
00220
00221
00222 virtual int AscendingFrequency() { return OK; }
00223
00224
00225
00226
00227
00228
00229 virtual int StripNonExistent(unsigned int& stripped, unsigned int& killed_mandatory) {
00230 stripped = 0;
00231 killed_mandatory = 0;
00232 return OK;
00233 }
00234
00235
00236
00237
00238
00239
00240 static int TopLevelOptimize(WordTree*& expr);
00241
00242 inline int Verbose(int verbosity) { return verbose = verbosity; }
00243
00244
00245
00246
00247
00248
00249
00250 int proximity;
00251
00252
00253
00254
00255 int has_uniq;
00256
00257
00258
00259 WordKeySemantic key_semantic;
00260
00261
00262
00263 String scope;
00264
00265
00266
00267
00268 String search;
00269
00270
00271
00272
00273
00274
00275
00276 String info;
00277
00278
00279
00280 WordResults *results;
00281 int verbose;
00282 };
00283
00284
00285
00286 class WordTreeLiteral : public WordTree {
00287 public:
00288
00289
00290
00291
00292 WordTreeLiteral(WordList* words, const char* string, int string_length, const char* nscope = "") :
00293 WordTree(words),
00294 bottom_document(words->GetContext()),
00295 top_document(words->GetContext())
00296 {
00297 search.set(string, string_length);
00298 scope.set((char*)nscope);
00299 }
00300
00301
00302
00303
00304 int IsA() const { return WORD_TREE_LITERAL; }
00305
00306 virtual int WalkInit();
00307 virtual int WalkRewind();
00308
00309
00310
00311 virtual int WalkNext();
00312 virtual int Seek(const WordKey& patch);
00313
00314
00315
00316
00317
00318
00319
00320
00321 virtual int Get(String& bufferout) const;
00322
00323 virtual int Bounds(const WordKey& bottom, const WordKey& top) {
00324 bottom_document = bottom;
00325 top_document = top;
00326 return OK;
00327 }
00328
00329 protected:
00330 WordKey bottom_document;
00331 WordKey top_document;
00332 };
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366 class WordTreeOperand : public WordTree
00367 {
00368 public:
00369
00370
00371
00372 WordTreeOperand(WordList* words, const char* nscope) :
00373 WordTree(words),
00374 pos(words->GetContext())
00375 {
00376 scope.set((char*)nscope);
00377 }
00378
00379
00380
00381
00382 virtual ~WordTreeOperand();
00383
00384 virtual void Clear() {
00385 cursors = 0;
00386 cursors_length = 0;
00387 WordCursorOne::Clear();
00388 }
00389
00390
00391
00392
00393 virtual int Optimize();
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 int OptimizeOr(int proximity);
00411
00412 virtual int ContextSave(String& buffer) const {
00413 StringList list;
00414 int ret;
00415 if((ret = ContextSaveList(list)) != OK)
00416 return ret;
00417
00418 buffer.trunc();
00419 String* element;
00420 list.Start_Get();
00421 while((element = (String*)list.Get_Next())) {
00422 buffer << (*element) << ';';
00423 }
00424
00425
00426
00427 buffer.chop(1);
00428
00429 return OK;
00430 }
00431
00432 virtual int ContextSaveList(StringList& list) const {
00433
00434
00435
00436 unsigned int i;
00437 for(i = 0; i < cursors_length; i++)
00438 if(cursors[i]->ContextSaveList(list) == NOTOK)
00439 return NOTOK;
00440 return OK;
00441 }
00442
00443 virtual int ContextRestore(const String& buffer) {
00444 if(!buffer.empty()) {
00445 StringList list(buffer, ";");
00446 return ContextRestoreList(list);
00447 } else {
00448 return OK;
00449 }
00450 }
00451
00452 virtual int ContextRestoreList(StringList& list) {
00453
00454
00455
00456 unsigned int i;
00457 for(i = 0; i < cursors_length; i++)
00458 if(cursors[i]->ContextRestoreList(list) == NOTOK)
00459 return NOTOK;
00460 return OK;
00461 }
00462
00463 virtual int SetResults(WordResults* nresults) {
00464
00465
00466
00467 unsigned int i;
00468 for(i = 0; i < cursors_length; i++)
00469 if(cursors[i]->SetResults(nresults) == NOTOK)
00470 return NOTOK;
00471 return WordTree::SetResults(nresults);
00472 }
00473
00474
00475
00476
00477 virtual int WalkInit();
00478
00479
00480
00481
00482 virtual int WalkRewind();
00483
00484
00485
00486 virtual int WalkFinish();
00487
00488
00489
00490
00491
00492 virtual int Seek(const WordKey& patch);
00493
00494
00495
00496
00497
00498 virtual int Noccurrence(unsigned int& noccurrence) const {
00499 noccurrence = 0;
00500 unsigned int i;
00501 for(i = 0; i < cursors_length; i++) {
00502 unsigned int frequency;
00503 if(cursors[i]->Noccurrence(frequency) != OK)
00504 return NOTOK;
00505 noccurrence += frequency;
00506 }
00507 return OK;
00508 }
00509
00510
00511
00512
00513
00514 virtual int Get(String& bufferout) const;
00515
00516
00517
00518
00519 virtual int Prepare(WordTreeArg& arg) {
00520 int ret;
00521 if((ret = WordTree::Prepare(arg)) != OK)
00522 return ret;
00523 unsigned int i;
00524 for(i = 0; i < cursors_length; i++) {
00525 cursors[i]->Verbose(verbose);
00526 if((ret = cursors[i]->Prepare(arg)) != OK)
00527 return ret;
00528 }
00529 return Get(search);
00530 }
00531
00532 virtual int Bounds(const WordKey& bottom, const WordKey& top) {
00533 int ret;
00534 unsigned int i;
00535 for(i = 0; i < cursors_length; i++) {
00536 if((ret = cursors[i]->Bounds(bottom, top)) != OK)
00537 return ret;
00538 }
00539 return OK;
00540 }
00541
00542
00543
00544
00545
00546
00547 WordKey pos;
00548
00549
00550
00551 WordTree** cursors;
00552
00553
00554
00555 unsigned int cursors_length;
00556
00557
00558
00559 WordPermute permutation;
00560 };
00561
00562
00563
00564 class WordTreeOptional : public WordTreeOperand {
00565 public:
00566 WordTreeOptional(WordList* words, const char* nscope) : WordTreeOperand(words, nscope) { }
00567
00568
00569
00570
00571 virtual int IsA() const { return WORD_TREE_OPTIONAL; }
00572
00573 virtual int Optimize();
00574
00575 virtual int ContextSaveList(StringList& list) const;
00576
00577 virtual int ContextRestoreList(StringList& list);
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588 virtual int WalkNext();
00589
00590
00591
00592
00593 virtual int Seek(const WordKey& position);
00594
00595 virtual int Prepare(WordTreeArg& arg) {
00596 int ret;
00597 if((ret = permutation.Initialize(cursors_length, 0, 0, WORD_PERMUTE_PROXIMITY_TOGGLE)) != OK)
00598 return ret;
00599 permutation.Verbose(verbose);
00600 return WordTreeOperand::Prepare(arg);
00601 }
00602
00603 virtual void SetInfo();
00604
00605 virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_TOGGLE; }
00606
00607 virtual int UsePermutation() const { return 1; }
00608
00609 virtual int SetRealm(const WordKey& realm) {
00610 unsigned int i;
00611 int ret = OK;
00612 for(i = 0; i < cursors_length; i++) {
00613 if(permutation.Excluded(i)) {
00614
00615
00616
00617
00618
00619 if((ret = cursors[i]->UndefinedRealm()) != OK)
00620 return ret;
00621 } else {
00622 if((ret = cursors[i]->SetRealm(realm)) != OK)
00623 return ret;
00624 }
00625 }
00626 return OK;
00627 }
00628
00629 virtual int Count(unsigned int& count) const;
00630
00631
00632
00633
00634 virtual int AllOrNothing() const { return 0; }
00635
00636
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659 int SearchCursorNear(WordTree& cursor, WordTree*& master, WordKey& constraint, int proximity);
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676 int SearchCursorNot(WordTree& cursor, WordKey& document);
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693 int SearchCursorAnd(WordTree& cursor, WordKey& document, WordExclude& permutation);
00694
00695
00696
00697
00698
00699
00700 virtual int AscendingFrequency();
00701
00702
00703
00704
00705
00706
00707
00708 virtual int StripNonExistent(unsigned int& stripped, unsigned int& killed_mandatory);
00709 };
00710
00711
00712
00713 class WordTreeOr : public WordTreeOperand {
00714 public:
00715 WordTreeOr(WordList* words, const char* nscope) : WordTreeOperand(words, nscope) { }
00716
00717
00718
00719
00720 virtual int IsA() const { return WORD_TREE_OR; }
00721
00722 virtual int Optimize();
00723
00724 virtual int ContextSaveList(StringList& list) const;
00725
00726 virtual int ContextRestoreList(StringList& list);
00727
00728 virtual void SetInfo();
00729
00730 virtual int WalkNext();
00731
00732 virtual int UsePermutation() const { return 0; }
00733
00734 virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_NO; }
00735 };
00736
00737
00738
00739 class WordTreeAnd : public WordTreeOptional {
00740 public:
00741 WordTreeAnd(WordList* words, const char* nscope) : WordTreeOptional(words, nscope) { }
00742
00743
00744
00745
00746 virtual int IsA() const { return WORD_TREE_AND; }
00747
00748 virtual int UsePermutation() const { return 0; }
00749
00750 virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_NO; }
00751
00752 virtual int AllOrNothing() const { return 1; }
00753 };
00754
00755
00756
00757 class WordTreeNear : public WordTreeOptional {
00758 public:
00759 WordTreeNear(WordList* words, const char* nscope) : WordTreeOptional(words, nscope) { }
00760
00761
00762
00763
00764 virtual int IsA() const { return WORD_TREE_NEAR; }
00765
00766 virtual int UsePermutation() const { return 0; }
00767
00768 virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_ONLY; }
00769
00770 virtual int AllOrNothing() const { return 1; }
00771
00772 virtual int Prepare(WordTreeArg& arg) {
00773 int proximity_saved = proximity ? proximity : arg._nproximity;
00774 int ret = WordTreeOptional::Prepare(arg);
00775 proximity = proximity_saved;
00776 return ret;
00777 }
00778
00779 };
00780
00781
00782
00783 class WordTreeMandatory : public WordTreeOperand {
00784 public:
00785 WordTreeMandatory(WordList* words, const char* nscope) : WordTreeOperand(words, nscope) { }
00786
00787
00788
00789
00790 virtual int IsA() const { return WORD_TREE_MANDATORY; }
00791 };
00792
00793
00794
00795 class WordTreeNot : public WordTreeOperand {
00796 public:
00797 WordTreeNot(WordList* words, const char* nscope) : WordTreeOperand(words, nscope) { }
00798
00799
00800
00801
00802 virtual int IsA() const { return WORD_TREE_NOT; }
00803 };
00804
00805 #endif