WordTree.cc

Go to the documentation of this file.
00001 //
00002 // Part of the ht://Dig package   <http://www.htdig.org/>
00003 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00004 // For copyright details, see the file COPYING in your distribution
00005 // or the GNU General Public License version 2 or later
00006 // <http://www.gnu.org/copyleft/gpl.html>
00007 //
00008 // $Id: WordTree_8cc-source.html,v 1.1 2008/06/08 10:13:23 sebdiaz Exp $
00009 //
00010 #ifdef HAVE_CONFIG_H
00011 #include <config.h>
00012 #endif /* HAVE_CONFIG_H */
00013 
00014 #ifdef HAVE_UNISTD_H
00015 #include <unistd.h>
00016 #endif /* HAVE_UNISTD_H */
00017 
00018 #include <HtMaxMin.h>
00019 
00020 #include <WordTree.h>
00021 
00022 static char* operator_name[WORD_TREE_OP_SIZE] = {
00023   "",
00024   "or",
00025   "and",
00026   "near",
00027   "optional",
00028   "literal",
00029   "mandatory",
00030   "not",
00031   0
00032 };
00033 
00034 // ************************* WordTree implementation ****************
00035 
00036 int WordTree::WalkNextExclude(const WordKey& key)
00037 {
00038   int exclude = 0;
00039   if(results) {
00040     if(has_uniq) {
00041       exclude = results->UniqExists(key);
00042     }
00043     if(!exclude) {
00044       exclude = results->Exists(key);
00045     }
00046   }
00047   return exclude;
00048 }
00049 
00050 int WordTree::TopLevelOptimize(WordTree*& expr)
00051 {
00052   int verbose = expr->verbose;
00053   printf("Bef01\n");
00054   if(verbose) fprintf(stderr, "WordSearch::Optimize: non optimized expression %s\n", (char*)expr->Get());
00055   if(expr->Optimize() != OK)
00056     return NOTOK;
00057   printf("Bef02\n");
00058   /*
00059    * Single mandatory word equivalent to single word. 
00060    */
00061   if(expr->IsA() == WORD_TREE_MANDATORY) {
00062     WordTreeMandatory* mandatory = (WordTreeMandatory*)expr;
00063     if(mandatory->cursors_length != 1) {
00064       fprintf(stderr, "WordTree::TopLevelOptimize: unexpected cursor length == %d instead of 1\n", mandatory->cursors_length);
00065       return NOTOK;
00066     }
00067     expr = mandatory->cursors[0];
00068     mandatory->cursors_length = 0;
00069     delete mandatory;
00070   }
00071   printf("Bef03\n");
00072 
00073   /* 
00074    * Single forbiden word is something we don't want to handle.
00075    */
00076   if(expr->IsA() == WORD_TREE_NOT) {
00077     delete expr;
00078     expr = 0;
00079   }
00080   
00081   printf("Bef04\n");
00082   if(verbose) {
00083     if(expr) {
00084       fprintf(stderr, "WordTree::TopLevelOptimize: optimized expression %s\n", (char*)expr->Get());
00085     } else {
00086       fprintf(stderr, "WordTree::TopLevelOptimize: search expression is meaningless, discarded\n");
00087     }
00088   }
00089 
00090   printf("Bef05\n");
00091 
00092   if(expr && expr->IsA() != WORD_TREE_LITERAL && expr->key_semantic.HasRealm()) {
00093     if(expr->ClearRealm() != OK)
00094       return NOTOK;
00095   }
00096 
00097   printf("Bef06\n");
00098   return OK;
00099 }
00100 
00101 // ************************* WordTreeLiteral implementation ****************
00102 
00103 int WordTreeLiteral::WalkInit()
00104 {
00105   if(WordCursorOne::WalkInit() != OK)
00106     return NOTOK;
00107   if(!bottom_document.Empty())
00108     return Seek(bottom_document);
00109   else
00110     return OK;
00111 }
00112 
00113 int WordTreeLiteral::WalkRewind()
00114 {
00115   if(WordCursorOne::WalkRewind() != OK) return NOTOK;
00116   if(!bottom_document.Empty())
00117     return Seek(bottom_document);
00118   return OK;
00119 }
00120 
00121 int WordTreeLiteral::WalkNext()
00122 {
00123   int ret;
00124   int too_low = 0;
00125 
00126   do {
00127     ret = WordCursorOne::WalkNext();
00128     //
00129     // If the returned document is before the lowest document allowed,
00130     // Seek and try again.
00131     //
00132     if(ret == OK &&
00133        !bottom_document.Empty() &&
00134        key_semantic.DocumentCompare(bottom_document, GetDocument()) > 0) {
00135       WordKey patch = bottom_document;
00136       key_semantic.RealmCopy(GetFound().Key(), patch);
00137       ret = Seek(patch);
00138       too_low = 1;
00139     } else {
00140       too_low = 0;
00141     }
00142   } while(ret == OK && too_low);
00143 
00144   if(verbose > 3) fprintf(stderr, "WordTreeLiteral::WalkNext: reached %s\n", (char*)GetDocument().Get());
00145 
00146   if(ret == OK && !top_document.Empty()) {
00147     if(key_semantic.DocumentCompare(top_document, GetDocument()) <= 0)
00148       ret = WORD_WALK_ATEND|WORD_WALK_ATEND_NOMATCH;
00149   }
00150 
00151   return ret;
00152 }
00153 
00154 int WordTreeLiteral::Seek(const WordKey& position)
00155 {
00156   return WordCursorOne::Seek((bottom_document.Empty() || key_semantic.DocumentCompare(bottom_document, position) <= 0) ? position : bottom_document);
00157 }
00158 
00159 
00160 int WordTreeLiteral::Get(String& bufferout) const
00161 {
00162 
00163   if(scope.empty())
00164     bufferout << search;
00165   else
00166     bufferout << "( " << operator_name[IsA()] << " \"" << scope << "\" " << search << " )";
00167   return OK;
00168 }
00169 
00170 
00171 // ************************* WordTreeOperand implementation ****************
00172 
00173 static char* ret2str(int ret)
00174 {
00175   if(ret == WORD_WALK_REDO)
00176     return "REDO";
00177 
00178   if(ret == WORD_WALK_RESTART)
00179     return "RESTART";
00180 
00181   if(ret == WORD_WALK_NEXT)
00182     return "NEXT";
00183 
00184   if(ret == OK)
00185     return "OK";
00186 
00187   if(ret == NOTOK)
00188     return "NOTOK";
00189 
00190   if(ret == WORD_WALK_ATEND)
00191     return "ATEND";
00192 
00193   if(ret == WORD_WALK_ATEND|WORD_WALK_ATEND_NOMATCH)
00194     return "ATEND|NOMATCH";
00195 
00196   return "???";
00197 }
00198 
00199 WordTreeOperand::~WordTreeOperand()
00200 {
00201   if(cursors) {
00202     unsigned int i;
00203     for(i = 0; i < cursors_length; i++)
00204       delete cursors[i];
00205     free(cursors);
00206   }
00207 }
00208 
00209 int 
00210 WordTreeOperand::Optimize()
00211 {
00212   //
00213   // Apply to each cursor
00214   //
00215   unsigned int i;
00216   for(i = 0; i < cursors_length; i++)
00217     if(cursors[i]->Optimize() == NOTOK)
00218       return NOTOK;
00219   return OK;
00220 } 
00221 
00222 int WordTreeOperand::OptimizeOr(int proximity)
00223 {
00224   unsigned int ignore = 0;
00225   unsigned int ignore_mask = 0;
00226   unsigned int i;
00227   for(i = 0; i < cursors_length; i++) {
00228     int reduce;
00229     //
00230     // Set ignore & ignore_mask if cursor is NOT or MANDATORY
00231     //
00232     switch(cursors[i]->IsA()) {
00233     case WORD_TREE_MANDATORY:
00234       ignore |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
00235       reduce = 1;
00236       break;
00237     case WORD_TREE_NOT:
00238       ignore |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
00239       ignore_mask |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
00240       reduce = 1;
00241       break;
00242     default:
00243       reduce = 0;
00244       break;
00245     }
00246     //
00247     // Replace the NOT or MANDATORY node by its only child
00248     //
00249     if(reduce) {
00250       WordTreeOperand* old = (WordTreeOperand*)cursors[i];
00251       cursors[i] = old->cursors[0];
00252       old->cursors[0] = 0;
00253       old->cursors_length--;
00254       if(old->cursors_length > 0) {
00255         fprintf(stderr, "WordTreeOperand::OptimizeOr: too many cursors\n");
00256         return NOTOK;
00257       }
00258       delete old;
00259     }
00260   }
00261   return permutation.Initialize(cursors_length, ignore, ignore_mask, proximity);
00262 }
00263 
00264 int 
00265 WordTreeOperand::WalkInit()
00266 {
00267   unsigned int i;
00268   int ret = WORD_WALK_ATEND;
00269   for(i = 0; i < cursors_length; i++)
00270     if((ret = cursors[i]->WalkInit()) != OK)
00271       return ret;
00272   return (status = ret);
00273 }
00274 
00275 int 
00276 WordTreeOperand::WalkRewind()
00277 {
00278   unsigned int i;
00279   int ret = OK;
00280   for(i = 0; i < cursors_length; i++)
00281     if((ret = cursors[i]->WalkRewind()) != OK)
00282       return ret;
00283   status = OK;
00284   key_semantic.DocumentClear(pos);
00285   cursor_get_flags = DB_SET_RANGE;
00286   found.Clear();
00287   return ret;
00288 }
00289 
00290 int 
00291 WordTreeOperand::WalkFinish()
00292 {
00293   unsigned int i;
00294   int ret = OK;
00295   for(i = 0; i < cursors_length; i++)
00296     if((ret = cursors[i]->WalkFinish()) != OK)
00297       return ret;
00298   return ret;
00299 }
00300 
00301 int 
00302 WordTreeOperand::Seek(const WordKey& patch)
00303 {
00304   pos = patch;
00305   cursor_get_flags = DB_SET_RANGE;
00306 
00307   unsigned int i;
00308   int ret = OK;
00309   for(i = 0; i < cursors_length; i++)
00310     if((ret = cursors[i]->Seek(patch)) != OK &&
00311        !cursors[i]->IsAtEnd())
00312       return ret;
00313   status = OK;
00314   return OK;
00315 }
00316 
00317 int WordTreeOperand::Get(String& bufferout) const
00318 {
00319   bufferout << "( " << operator_name[IsA()] << " \"" << scope << "\" ";
00320   unsigned int i;
00321   for(i = 0; i < cursors_length; i++)
00322     bufferout << cursors[i]->Get() << " ";
00323   bufferout << " )";
00324   return OK;
00325 }
00326 
00327 // ************************* WordTreeOptional implementation ****************
00328 
00329 int WordTreeOptional::Optimize()
00330 {
00331   int ret;
00332   if((ret = WordTreeOperand::Optimize()) != OK)
00333     return ret;
00334 
00335   if(UseProximity() != WORD_PERMUTE_PROXIMITY_ONLY) {
00336     if((ret = AscendingFrequency()) != OK)
00337       return ret;
00338   }
00339 
00340   unsigned int stripped;
00341   unsigned int killed_mandatory;
00342   if((ret = StripNonExistent(stripped, killed_mandatory)) != OK)
00343     return ret;
00344 
00345   if(killed_mandatory || (AllOrNothing() && stripped)) {
00346     //
00347     // One word is missing and everything is lost,
00348     // Just kill the remaining cursors.
00349     //
00350     unsigned int i;
00351     for(i = 0; i < cursors_length; i++)
00352       delete cursors[i];
00353     cursors_length = 0;
00354 
00355     return OK;
00356   } else {
00357     return OptimizeOr(UseProximity());
00358   }
00359 }
00360 
00361 int WordTreeOptional::ContextSaveList(StringList& list) const
00362 {
00363   int ret;
00364   if((ret = WordTreeOperand::ContextSaveList(list)) != OK)
00365     return ret;
00366 
00367   if(UsePermutation()) {
00368     String* buffer = new String();
00369     permutation.Get(*buffer);
00370     
00371     list.Add(buffer);
00372   }
00373    
00374   {
00375     String* buffer = new String();
00376     if((ret = WordCursorOne::ContextSave(*buffer)) != OK)
00377       return ret;
00378 
00379     list.Add(buffer);
00380   }
00381 
00382   return OK;
00383 }
00384 
00385 int WordTreeOptional::ContextRestoreList(StringList& list)
00386 {
00387   int ret;
00388   if((ret = WordTreeOperand::ContextRestoreList(list)) != OK)
00389     return ret;
00390 
00391   if(UsePermutation()) {
00392     char* buffer = list[0];
00393     if((ret = permutation.Set(buffer)) != OK)
00394       return ret;
00395     list.Remove(0);
00396   }
00397 
00398   {
00399     char* buffer = list[0];
00400     if(!buffer) return NOTOK;
00401     WordKey key(words->GetContext(), buffer);
00402     if((ret = Seek(key)) != OK)
00403       return ret;
00404     cursor_get_flags = DB_NEXT;
00405 
00406     list.Remove(0);
00407   }
00408 
00409   return OK;
00410 }
00411 
00412 int WordTreeOptional::WalkNext()
00413 {
00414   WordKey& constraint = pos;
00415   //
00416   // Set constraint with all 0
00417   //
00418   if(constraint.Empty()) {
00419     if(verbose) fprintf(stderr, "  WordTreeOptional::WalkNext: required position or document is reset, i.e. before first possible entry\n");
00420     key_semantic.DocumentClear(constraint);
00421   }
00422 
00423   int ret = OK;
00424   //
00425   // Advance cursors so that next call fetches another constraint
00426   //
00427   if(cursor_get_flags == DB_NEXT)
00428     key_semantic.DocumentNext(constraint, has_uniq);
00429     
00430   if((ret = Seek(constraint)) != OK)
00431     return ret;
00432 
00433   int near = permutation.Proximity();
00434   WordTree* first = 0;
00435   for(unsigned int i = 0; i < cursors_length;) {
00436     int excluded = permutation.Excluded(i);
00437     WordTree& cursor = *(cursors[i]);
00438 
00439     if(excluded && !permutation.Ignored(i)) {
00440       if(verbose) fprintf(stderr, "\n  WordTreeOptional::WalkNext: looking for %s (ignored)\n", (char*)cursor.search);
00441       i++;
00442       continue;
00443     }
00444 
00445     near = permutation.Proximity();
00446     if(verbose) fprintf(stderr, "\n  WordTreeOptional::WalkNext: looking for %s (excluded = %s, proximity = %s)\n", (char*)cursor.search, (excluded ? "yes" : "no"), (near ? "yes" : "no" ));
00447 
00448     int ret;
00449     if(excluded) {
00450       ret = SearchCursorNot(cursor, constraint);
00451       if(verbose) fprintf(stderr, "  WordTreeOptional::WalkNext: Not -> %s\n", ret2str(ret));
00452     } else {
00453       if(near) {
00454         ret = SearchCursorNear(cursor, first, constraint, proximity);
00455         if(verbose) fprintf(stderr, "  WordTreeOptional::WalkNext: Near -> %s\n", ret2str(ret));
00456       } else {
00457         ret = SearchCursorAnd(cursor, constraint, permutation);
00458         if(verbose) fprintf(stderr, "  WordTreeOptional::WalkNext: And -> %s\n", ret2str(ret));
00459       }
00460     }
00461 
00462     switch(ret & WORD_WALK_RESULT_MASK) {
00463     case WORD_WALK_ATEND:
00464       //
00465       // If a realm exists between the word and the document and the
00466       // cursor did not hit the end of the index.
00467       //
00468       if(key_semantic.HasRealm()) {
00469         if(ret & WORD_WALK_ATEND_NOMATCH) {
00470           //
00471           // If the cursor is still on the same word
00472           //
00473           if(cursor.GetFound().Key().Get(WORD_KEY_WORD) == cursor.GetSearch().Get(WORD_KEY_WORD)) {
00474             //
00475             // Change the realm of all cursors
00476             //
00477             if(SetRealm(cursor.GetFound().Key()) != OK)
00478               return NOTOK;
00479             //
00480             // And restart the search in this realm
00481             //
00482             if(WalkRewind() != OK)
00483               return NOTOK;
00484             first = 0;
00485             i = 0;
00486             break;
00487           }
00488         }
00489       }
00490       if(UsePermutation()) {
00491         //
00492         // The search is over with this permutation, try another one.
00493         //
00494         if(verbose) fprintf(stderr, "  WordTreeOptional::WalkNext: try next proximity/exclusion permutation\n");
00495         switch(permutation.Next()) {
00496           //
00497           // No permutations left, the end
00498           //
00499         case WORD_PERMUTE_END:
00500           if(verbose) fprintf(stderr, "\nWordTreeOptional::WalkNext: ATEND\n");
00501           return (status = WORD_WALK_ATEND);
00502           break;
00503 
00504           //
00505           // Sart over with this permutation
00506           //
00507         case WORD_PERMUTE_OK:
00508           if(key_semantic.HasRealm() && ClearRealm() != OK)
00509             return NOTOK;
00510           if(WalkRewind() != OK)
00511             return NOTOK;
00512           break;
00513         }
00514         first = 0;
00515         i = 0;
00516       } else {
00517         if(verbose) fprintf(stderr, "\nWordTreeOptional::WalkNext: ATEND\n");
00518         return (status = WORD_WALK_ATEND);
00519       }
00520       break;
00521     case WORD_WALK_REDO:
00522       break;
00523     case WORD_WALK_RESTART:
00524       first = 0;
00525       i = 0;
00526       break;
00527     case WORD_WALK_NEXT:
00528       i++;
00529       break;
00530     case NOTOK:
00531     default:
00532       if(verbose) fprintf(stderr, "\nWordTreeOptional::WalkNext: %s\n", ret2str(ret));
00533       return ret;
00534       break;
00535     }
00536   }
00537 
00538   cursor_get_flags = DB_NEXT;
00539 
00540   SetInfo();
00541     
00542   //
00543   // Save possible result, i.e. first non excluded cursor
00544   //
00545   for(unsigned int i = 0; i < cursors_length; i++) {
00546     WordTree& cursor = *(cursors[i]);
00547     if(!permutation.Excluded(i)) {
00548       found.Key() = cursor.GetFound().Key();
00549       break;
00550     }
00551   }
00552 
00553   if(verbose) fprintf(stderr, "\nWordTreeOptional::WalkNext: OK\n");
00554   return ret;
00555 }
00556 
00557 int WordTreeOptional::Seek(const WordKey& position)
00558 {
00559   pos = position;
00560   cursor_get_flags = DB_SET_RANGE;
00561   status = OK;
00562 
00563   unsigned int i;
00564   for(i = 0; i < cursors_length; i++) {
00565     if(!permutation.Excluded(i)) {
00566       WordTree& cursor = *(cursors[i]);
00567       return cursor.Seek(position);
00568     }
00569   }
00570 
00571   fprintf(stderr, "  WordTreeOptional::Seek: failed\n");
00572   return NOTOK;
00573 }  
00574 
00575 
00576 void WordTreeOptional::SetInfo()
00577 {
00578   unsigned int i;
00579   for(i = 0; i < cursors_length; i++)
00580     cursors[i]->SetInfo();
00581 
00582   info.trunc();
00583 
00584   for(i = 0; i < cursors_length; i++) {
00585     WordTree& cursor = *(cursors[i]);
00586 
00587     if(!permutation.Excluded(i))
00588       info << cursor.info << " ";
00589   }
00590 
00591   info << (permutation.Proximity() ? "proximity" : "");
00592 }
00593 
00594 int WordTreeOptional::Count(unsigned int& count) const
00595 {
00596   unsigned int count_and = 0;
00597 
00598   count = 0;
00599   for(unsigned int i = 0; i < cursors_length; i++) {
00600     unsigned int cursor_count;
00601     cursors[i]->Count(cursor_count);
00602     if(permutation.Ignored(i) && !permutation.Excluded(i)) {
00603       count_and = count_and ? HtMIN(count_and, cursor_count) : cursor_count;
00604     } else {
00605       count += cursor_count;
00606     }
00607   }
00608 
00609   count = count_and ? count_and : count;
00610 
00611   return OK;
00612 }
00613 
00614 int WordTreeOptional::SearchCursorNear(WordTree& cursor, WordTree*& master, WordKey& constraint, int proximity)
00615 {
00616   int is_master = master == 0 || master == &cursor;
00617   if(master == 0) master = &cursor;
00618   const WordKey& masterKey = master->GetFound().Key();
00619 
00620   int direction = key_semantic.LocationCompare(constraint, cursor.GetFound().Key(), proximity);
00621   if(verbose) {
00622     fprintf(stderr, "  WordTreeOptional::SearchCursorNear: required position is %s\n", (char*)(constraint.Get()));
00623     fprintf(stderr, "  WordTreeOptional::SearchCursorNear: cursor %s is %s\n",
00624             (cursor.IsAtEnd() ? "" : (char*)(cursor.GetFound().Key().Get())),
00625             (cursor.IsAtEnd() ? "at end" : ((direction == 0 ? "ok" : (direction < 0 ? "after required position" : "before required position")))));
00626   }
00627 
00628   //
00629   // If the cursor is in the authorized locations, consider
00630   // next cursor
00631   //
00632   if(direction == 0) {
00633     //
00634     // master cursor makes the rules for location : its location
00635     // is the base to calculate other words mandatory loacations.
00636     //
00637     if(is_master)
00638       key_semantic.LocationSet(cursor.GetFound().Key(), constraint);
00639     //
00640     // Fix location constraint to accomodate proximity tolerance.
00641     //
00642     key_semantic.LocationNearLowest(constraint, proximity);
00643     return WORD_WALK_NEXT;
00644 
00645     //
00646     // If current location is after cursor location
00647     //
00648   } else if(direction > 0) {
00649     //
00650     // Move the cursor up to the location.
00651     //
00652     cursor.Seek(constraint);
00653     if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorNear: seek to required position\n");
00654     int ret;
00655     if((ret = cursor.WalkNext()) == OK) {
00656       //
00657       // Remove the location constraint for the master word
00658       // so that it matches and then enforce location for other
00659       // keys.
00660       //
00661       if(is_master) {
00662         if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorNear: required position is copied from master cursor\n");
00663         key_semantic.Location2Document(constraint);
00664       }
00665       //
00666       // Reconsider the situation for this cursor
00667       //
00668       return WORD_WALK_REDO;
00669     } else {
00670       return ret;
00671     }
00672 
00673     //
00674     // If current location is lower than cursor location,
00675     // meaning that the cursor found no match for the current
00676     // location.
00677     //
00678   } else if(direction < 0) {
00679     //
00680     // The cursor document becomes the current document. 
00681     // The master cursor is forced to catch up.
00682     //
00683     if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorNear: required position is copied from cursor\n");
00684     key_semantic.DocumentSet(cursor.GetDocument(), constraint);
00685     //
00686     // It is possible that this cursor document is the same
00687     // as the master cursor document (if this cursor hit in the
00688     // same document but a higher location). In this case we must
00689     // increase the location of the master cursor otherwise it will
00690     // match without moving and loop forever.
00691     //
00692     if(!is_master && key_semantic.DocumentCompare(masterKey, constraint) == 0) {
00693       key_semantic.LocationSet(masterKey, constraint);
00694       key_semantic.LocationNext(constraint);
00695     }
00696     //
00697     // Since the current location changed, start over.
00698     //
00699     return WORD_WALK_RESTART;
00700   } else {
00701     fprintf(stderr, "  WordTreeOptional::WordCursorNear: reached unreachable statement\n");
00702     return NOTOK;
00703   }
00704   return NOTOK;
00705 }
00706 
00707 int WordTreeOptional::SearchCursorNot(WordTree& cursor, WordKey& document)
00708 {
00709   if(key_semantic.HasRealm()) {
00710     //
00711     // Since there is a realm we cannot rely on the sequential ordering
00712     // of the entries for a given document and we have to start over
00713     // each time.
00714     //
00715     WordKey search = cursor.GetSearch();
00716     key_semantic.DocumentCopy(document, cursor.GetSearch());
00717     cursor.UndefinedRealm();
00718     cursor.WalkRewind();
00719     cursor.Seek(document);
00720     if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorNot: seek to required document\n");
00721     if(cursor.WalkNext() != OK && !cursor.IsAtEnd())
00722       return NOTOK;
00723     int ret;
00724     if(cursor.IsAtEnd()) {
00725       ret = WORD_WALK_NEXT;
00726     } else {
00727       if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorNot: required document incremented\n");
00728       //
00729       // The cursor does not give any hint on a possible
00730       // next document, just go to the next possible one.
00731       //
00732       key_semantic.DocumentNext(document, has_uniq);
00733       //
00734       // Since the current document changed, start over.
00735       //
00736       ret = WORD_WALK_RESTART;
00737     }
00738     cursor.GetSearch() = search;
00739     cursor.WalkRewind();
00740     return ret;
00741   } else {
00742     int direction = key_semantic.DocumentCompare(document, cursor.GetFound().Key());
00743     if(verbose) {
00744       fprintf(stderr, "  WordTreeOptional::SearchCursorNot: required document is %s\n", (char*)(document.Get()));
00745       fprintf(stderr, "  WordTreeOptional::SearchCursorNot: cursor %s is %s\n",
00746               (cursor.IsAtEnd() ? "" : (char*)(cursor.GetFound().Key().Get())),
00747               (cursor.IsAtEnd() ? "at end" : ((direction == 0 ? "ok" : (direction < 0 ? "after required document" : "before required document")))));
00748     }
00749 
00750     //
00751     // If the cursor is after the current document
00752     // (being at the end of walk is being after all documents).
00753     //
00754     // Means that the cursor is positioned in an acceptable document
00755     // and proceed to the next cursor.
00756     //
00757     if(direction < 0 || cursor.IsAtEnd()) {
00758       if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorNot: cursor does not match in required document\n");
00759       return WORD_WALK_NEXT;
00760 
00761       //
00762       // If the cursor is before current document
00763       //
00764     } else if(direction > 0) {
00765       //
00766       // Move the cursor up to the document
00767       //
00768       cursor.Seek(document);
00769       if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorNot: seek to required document\n");
00770       if(cursor.WalkNext() != OK && !cursor.IsAtEnd())
00771         return NOTOK;
00772       //
00773       // It is expected in this case that the cursor has moved after
00774       // the current document and another visit in the loop will
00775       // tell us.
00776       //
00777       return WORD_WALK_REDO;
00778 
00779       //
00780       // If the cursor matches the current document.
00781       //
00782       // Means that the current document is not a possible match
00783       // since it is pointed by this cursor.
00784       //
00785     } else if(direction == 0) {
00786       if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorNot: required document incremented\n");
00787       //
00788       // The cursor does not give any hint on a possible
00789       // next document, just go to the next possible one.
00790       //
00791       key_semantic.DocumentNext(document, has_uniq);
00792       //
00793       // Since the current document changed, start over.
00794       //
00795       return WORD_WALK_RESTART;
00796     } else {
00797       fprintf(stderr, "  WordTreeOptional::WordCursorNot: reached unreachable statement\n");
00798       return NOTOK;
00799     }
00800   }
00801   return NOTOK;
00802 }
00803 
00804 int WordTreeOptional::SearchCursorAnd(WordTree& cursor, WordKey& document, WordExclude& permutation)
00805 {
00806   int direction = key_semantic.DocumentCompare(document, cursor.GetFound().Key());
00807   if(verbose) {
00808     fprintf(stderr, "  WordTreeOptional::SearchCursorAnd: required document is %s\n", (char*)(document.Get()));
00809     fprintf(stderr, "  WordTreeOptional::SearchCursorAnd: cursor %s is %s\n",
00810             (cursor.IsAtEnd() ? "" : (char*)(cursor.GetFound().Key().Get())),
00811             (cursor.IsAtEnd() ? "at end" : ((direction == 0 ? "ok" : (direction < 0 ? "after required document" : "before required document")))));
00812   }
00813 
00814   //
00815   // If the cursor is in the current document.
00816   //
00817   // Means that the cursor is positioned in an acceptable document
00818   // and proceed to the next cursor.
00819   //
00820   if(direction == 0) {
00821     return WORD_WALK_NEXT;
00822 
00823     //
00824     // If the cursor is before current document
00825     //
00826   } else if(direction > 0) {
00827     //
00828     // Move the cursor up to the document
00829     //
00830     if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorAnd: seek to required document\n");
00831     cursor.Seek(document);
00832     int ret;
00833     if((ret = cursor.WalkNext()) == OK)
00834       return WORD_WALK_REDO;
00835     else
00836       return ret;
00837 
00838     //
00839     // If the cursor is after current document.
00840     //
00841     // Means the the current document is not a possible match
00842     // since it will never reach it because it's already
00843     // after it.
00844     //
00845   } else if(direction < 0) {
00846     //
00847     // The cursor document becomes the current document.
00848     //
00849     if(verbose) fprintf(stderr, "  WordTreeOptional::SearchCursorAnd: required document is copied from cursor\n");
00850     key_semantic.DocumentSet(cursor.GetDocument(), document);
00851 
00852     //
00853     // Since the current document changed, start over.
00854     //
00855     return WORD_WALK_RESTART;
00856   } else {
00857     fprintf(stderr, "  WordTreeOptional::WordCursorAnd: reached unreachable statement\n");
00858     return NOTOK;
00859   }
00860   return NOTOK;
00861 }
00862 
00863 //
00864 // Helper class for AscendingFrequency method
00865 //
00866 class WordSort {
00867 public:
00868   unsigned int frequency;
00869   WordTree *cursor;
00870 };
00871 
00872 //
00873 // Helper function for AscendingFrequency method
00874 //
00875 static int ascending_frequency(const void *a, const void *b)
00876 {
00877   const WordSort& a_cursor = *(WordSort*)a;
00878   const WordSort& b_cursor = *(WordSort*)b;
00879 
00880   return a_cursor.frequency - b_cursor.frequency;
00881 }
00882 
00883 int WordTreeOptional::AscendingFrequency()
00884 {
00885   //
00886   // Reorder cursors
00887   //
00888   WordSort *tmp = new WordSort[cursors_length];
00889 
00890   memset((char*)tmp, '\0', sizeof(WordSort[cursors_length]));
00891 
00892   unsigned int i;
00893   for(i = 0; i < cursors_length; i++) {
00894     unsigned int frequency;
00895     if(cursors[i]->Noccurrence(frequency) != OK) {
00896       delete [] tmp;
00897       return NOTOK;
00898     }
00899     if(verbose > 2) fprintf(stderr, "  WordTreeOptional::AscendingFrequency: %s occurs %d times\n", (char*)cursors[i]->search, frequency);
00900     tmp[i].frequency = frequency;
00901     tmp[i].cursor = cursors[i];
00902   }
00903 
00904   memset((char*)cursors, '\0', sizeof(WordTree*) * cursors_length);
00905 
00906   qsort((void *)tmp, cursors_length, sizeof(WordSort), &ascending_frequency);
00907 
00908   for(i = 0; i < cursors_length; i++)
00909     cursors[i] = tmp[i].cursor;
00910 
00911   delete [] tmp;
00912   return OK;
00913 }
00914 
00915 int WordTreeOptional::StripNonExistent(unsigned int& stripped, unsigned int& killed_mandatory)
00916 {
00917   stripped = 0;
00918   killed_mandatory = 0;
00919 
00920   WordTree** tmp = new WordTree*[cursors_length];
00921   memset((char*)tmp, '\0', sizeof(WordTree*[cursors_length]));
00922 
00923   unsigned int from;
00924   unsigned int to;
00925 
00926   for(to = from = 0; from < cursors_length; from++) {
00927     unsigned int frequency;
00928     if(cursors[from]->Noccurrence(frequency) != OK) {
00929       delete [] tmp;
00930       return NOTOK;
00931     }
00932 
00933     if(verbose > 2) fprintf(stderr, "  WordTreeOptional::StripNonExistent: %s occurs %d times\n", (char*)cursors[from]->search, frequency);
00934     if(frequency > 0) {
00935       tmp[to++] = cursors[from];
00936     } else {
00937       if(cursors[from]->IsA() == WORD_TREE_MANDATORY)
00938         killed_mandatory++;
00939       delete cursors[from];
00940       stripped++;
00941     }
00942   }
00943 
00944   memset((char*)cursors, '\0', sizeof(WordTree*) * cursors_length);
00945   
00946   cursors_length = to;
00947   unsigned int i;
00948   for(i = 0; i < cursors_length; i++)
00949     cursors[i] = tmp[i];
00950 
00951   delete [] tmp;
00952 
00953   return OK;
00954 }
00955 
00956 // ************************* WordTreeOr implementation ********************
00957 
00958 int WordTreeOr::Optimize()
00959 {
00960   int ret;
00961   if((ret = WordTreeOperand::Optimize()) != OK)
00962     return ret;
00963 
00964   if((ret = AscendingFrequency()) != OK)
00965     return ret;
00966 
00967   unsigned int stripped;
00968   unsigned int killed_mandatory;
00969   if((ret = StripNonExistent(stripped, killed_mandatory)) != OK)
00970     return ret;
00971 
00972   return OptimizeOr(WORD_PERMUTE_PROXIMITY_NO);
00973 }
00974 
00975 int WordTreeOr::ContextSaveList(StringList& list) const
00976 {
00977   int ret;
00978   if((ret = WordTreeOperand::ContextSaveList(list)) != OK)
00979     return ret;
00980 
00981   {
00982     String* buffer = new String();
00983     permutation.Get(*buffer);
00984     
00985     list.Add(buffer);
00986   }
00987    
00988   {
00989     String* buffer = new String();
00990     if((ret = WordCursorOne::ContextSave(*buffer)) != OK)
00991       return ret;
00992 
00993     list.Add(buffer);
00994   }
00995 
00996   return OK;
00997 }
00998 
00999 int WordTreeOr::ContextRestoreList(StringList& list)
01000 {
01001   int ret;
01002   if((ret = WordTreeOperand::ContextRestoreList(list)) != OK)
01003     return ret;
01004 
01005   {
01006     char* buffer = list[0];
01007     if((ret = permutation.Set(buffer)) != OK)
01008       return ret;
01009     list.Remove(0);
01010   }
01011 
01012   {
01013     char* buffer = list[0];
01014     if(!buffer) return NOTOK;
01015     WordKey key(words->GetContext(), buffer);
01016     if((ret = Seek(key)) != OK)
01017       return ret;
01018     cursor_get_flags = DB_NEXT;
01019 
01020     list.Remove(0);
01021   }
01022 
01023   return OK;
01024 }
01025 
01026 void WordTreeOr::SetInfo()
01027 {
01028   unsigned int i;
01029   for(i = 0; i < cursors_length; i++)
01030     cursors[i]->SetInfo();
01031 
01032   info.trunc();
01033 
01034   for(i = 0; i < cursors_length; i++) {
01035     WordTree& cursor = *(cursors[i]);
01036 
01037     if(!permutation.Excluded(i) &&
01038        !cursor.IsAtEnd() &&
01039        key_semantic.DocumentCompare(cursor.GetFound().Key(), GetFound().Key()) == 0) {
01040       info << cursor.info << " ";
01041     }
01042   }
01043 }
01044 
01045 int WordTreeOr::WalkNext()
01046 {
01047   WordKey& constraint = pos;
01048   //
01049   // Set constraint with all 0
01050   //
01051   if(constraint.Empty())
01052     key_semantic.DocumentClear(constraint);
01053   
01054   WordKey candidate(words->GetContext());
01055   int match_ok;
01056   do {
01057       int ret;
01058       unsigned int i;
01059       candidate.Clear();
01060       //
01061       // Advance cursors so that next call fetches another constraint
01062       //
01063       if(cursor_get_flags == DB_NEXT)
01064           key_semantic.DocumentNext(constraint, has_uniq);
01065     
01066       if((ret = Seek(constraint)) != OK)
01067           return ret;
01068 
01069       match_ok = 1;
01070       //
01071       // All non excluded cursors are about to move
01072       // at or beyond constraint. Search for the one (candidate) that
01073       // is located at the lowest location beyond the constraint.
01074       //
01075       for(i = 0; i < cursors_length; i++) {
01076           if(permutation.Excluded(i))
01077               continue;
01078           WordTree& cursor = *(cursors[i]);
01079 
01080           switch((ret = cursor.WalkNext()) & WORD_WALK_RESULT_MASK) {
01081           case WORD_WALK_ATEND:
01082               //
01083               // Constraint is after all matches for this cursor
01084               //
01085               break;
01086           case OK:
01087               //
01088               // If candidate is not set or current cursor is before
01089               // the current candidate, the curent cursor document becomes
01090               // the candidate.
01091               //
01092               if(candidate.Empty() ||
01093                  key_semantic.DocumentCompare(candidate, cursor.GetFound().Key()) > 0) {
01094                   key_semantic.DocumentSet(cursor.GetDocument(), candidate);
01095               }
01096               break;
01097           default:
01098               return ret;
01099               break;
01100           }
01101       }
01102 
01103       //
01104       // No candidate ? It's the end of the match list.
01105       //
01106       if(candidate.Empty())
01107           return WORD_WALK_ATEND;
01108 
01109       found.Key() = candidate;
01110 
01111       SetInfo();
01112 
01113       if(permutation.ExcludedCount() > 0) {
01114         if((ret = Seek(candidate)) != OK)
01115           return ret;
01116 
01117         //
01118         // Restart loop if candidate matches an excluded cursor.
01119         //
01120         for(i = 0; i < cursors_length && match_ok; i++) {
01121           if(!permutation.Excluded(i))
01122             continue;
01123           WordTree& cursor = *(cursors[i]);
01124 
01125           switch((ret = cursor.WalkNext()) & WORD_WALK_RESULT_MASK) {
01126           case WORD_WALK_ATEND:
01127             //
01128             // This excluded cursor can't match the candidate, fine.
01129             //
01130             break;
01131           case OK:
01132             //
01133             // This excluded cursor matches candidate therefore it's
01134             // not a valid candidate. Restart search with this candidate
01135             // as the constraint.
01136             //
01137             if(key_semantic.DocumentCompare(candidate, cursor.GetFound().Key()) == 0) {
01138               constraint = candidate;
01139               match_ok = 0;
01140             }
01141             break;
01142           default:
01143             return ret;
01144             break;
01145           }
01146           
01147         }
01148       }
01149 
01150       cursor_get_flags = DB_NEXT;
01151 
01152   } while(!match_ok);
01153 
01154   constraint = candidate;
01155 
01156   return OK;
01157 }
01158 
01159 // ************************* WordTreeAnd implementation ********************
01160 
01161 // ************************* WordTreeNear implementation ********************
01162 
01163 // ************************* WordTreeMandatory implementation ***************
01164 
01165 // ************************* WordTreeNot implementation ***************
01166 

Generated on Sun Jun 8 10:56:40 2008 for GNUmifluz by  doxygen 1.5.5