00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifdef HAVE_CONFIG_H
00011 #include <config.h>
00012 #endif
00013
00014 #ifdef HAVE_UNISTD_H
00015 #include <unistd.h>
00016 #endif
00017
00018 #include <HtMaxMin.h>
00019
00020 #include <WordTree.h>
00021
00022 static char* operator_name[WORD_TREE_OP_SIZE] = {
00023 "",
00024 "or",
00025 "and",
00026 "near",
00027 "optional",
00028 "literal",
00029 "mandatory",
00030 "not",
00031 0
00032 };
00033
00034
00035
00036 int WordTree::WalkNextExclude(const WordKey& key)
00037 {
00038 int exclude = 0;
00039 if(results) {
00040 if(has_uniq) {
00041 exclude = results->UniqExists(key);
00042 }
00043 if(!exclude) {
00044 exclude = results->Exists(key);
00045 }
00046 }
00047 return exclude;
00048 }
00049
00050 int WordTree::TopLevelOptimize(WordTree*& expr)
00051 {
00052 int verbose = expr->verbose;
00053 printf("Bef01\n");
00054 if(verbose) fprintf(stderr, "WordSearch::Optimize: non optimized expression %s\n", (char*)expr->Get());
00055 if(expr->Optimize() != OK)
00056 return NOTOK;
00057 printf("Bef02\n");
00058
00059
00060
00061 if(expr->IsA() == WORD_TREE_MANDATORY) {
00062 WordTreeMandatory* mandatory = (WordTreeMandatory*)expr;
00063 if(mandatory->cursors_length != 1) {
00064 fprintf(stderr, "WordTree::TopLevelOptimize: unexpected cursor length == %d instead of 1\n", mandatory->cursors_length);
00065 return NOTOK;
00066 }
00067 expr = mandatory->cursors[0];
00068 mandatory->cursors_length = 0;
00069 delete mandatory;
00070 }
00071 printf("Bef03\n");
00072
00073
00074
00075
00076 if(expr->IsA() == WORD_TREE_NOT) {
00077 delete expr;
00078 expr = 0;
00079 }
00080
00081 printf("Bef04\n");
00082 if(verbose) {
00083 if(expr) {
00084 fprintf(stderr, "WordTree::TopLevelOptimize: optimized expression %s\n", (char*)expr->Get());
00085 } else {
00086 fprintf(stderr, "WordTree::TopLevelOptimize: search expression is meaningless, discarded\n");
00087 }
00088 }
00089
00090 printf("Bef05\n");
00091
00092 if(expr && expr->IsA() != WORD_TREE_LITERAL && expr->key_semantic.HasRealm()) {
00093 if(expr->ClearRealm() != OK)
00094 return NOTOK;
00095 }
00096
00097 printf("Bef06\n");
00098 return OK;
00099 }
00100
00101
00102
00103 int WordTreeLiteral::WalkInit()
00104 {
00105 if(WordCursorOne::WalkInit() != OK)
00106 return NOTOK;
00107 if(!bottom_document.Empty())
00108 return Seek(bottom_document);
00109 else
00110 return OK;
00111 }
00112
00113 int WordTreeLiteral::WalkRewind()
00114 {
00115 if(WordCursorOne::WalkRewind() != OK) return NOTOK;
00116 if(!bottom_document.Empty())
00117 return Seek(bottom_document);
00118 return OK;
00119 }
00120
00121 int WordTreeLiteral::WalkNext()
00122 {
00123 int ret;
00124 int too_low = 0;
00125
00126 do {
00127 ret = WordCursorOne::WalkNext();
00128
00129
00130
00131
00132 if(ret == OK &&
00133 !bottom_document.Empty() &&
00134 key_semantic.DocumentCompare(bottom_document, GetDocument()) > 0) {
00135 WordKey patch = bottom_document;
00136 key_semantic.RealmCopy(GetFound().Key(), patch);
00137 ret = Seek(patch);
00138 too_low = 1;
00139 } else {
00140 too_low = 0;
00141 }
00142 } while(ret == OK && too_low);
00143
00144 if(verbose > 3) fprintf(stderr, "WordTreeLiteral::WalkNext: reached %s\n", (char*)GetDocument().Get());
00145
00146 if(ret == OK && !top_document.Empty()) {
00147 if(key_semantic.DocumentCompare(top_document, GetDocument()) <= 0)
00148 ret = WORD_WALK_ATEND|WORD_WALK_ATEND_NOMATCH;
00149 }
00150
00151 return ret;
00152 }
00153
00154 int WordTreeLiteral::Seek(const WordKey& position)
00155 {
00156 return WordCursorOne::Seek((bottom_document.Empty() || key_semantic.DocumentCompare(bottom_document, position) <= 0) ? position : bottom_document);
00157 }
00158
00159
00160 int WordTreeLiteral::Get(String& bufferout) const
00161 {
00162
00163 if(scope.empty())
00164 bufferout << search;
00165 else
00166 bufferout << "( " << operator_name[IsA()] << " \"" << scope << "\" " << search << " )";
00167 return OK;
00168 }
00169
00170
00171
00172
00173 static char* ret2str(int ret)
00174 {
00175 if(ret == WORD_WALK_REDO)
00176 return "REDO";
00177
00178 if(ret == WORD_WALK_RESTART)
00179 return "RESTART";
00180
00181 if(ret == WORD_WALK_NEXT)
00182 return "NEXT";
00183
00184 if(ret == OK)
00185 return "OK";
00186
00187 if(ret == NOTOK)
00188 return "NOTOK";
00189
00190 if(ret == WORD_WALK_ATEND)
00191 return "ATEND";
00192
00193 if(ret == WORD_WALK_ATEND|WORD_WALK_ATEND_NOMATCH)
00194 return "ATEND|NOMATCH";
00195
00196 return "???";
00197 }
00198
00199 WordTreeOperand::~WordTreeOperand()
00200 {
00201 if(cursors) {
00202 unsigned int i;
00203 for(i = 0; i < cursors_length; i++)
00204 delete cursors[i];
00205 free(cursors);
00206 }
00207 }
00208
00209 int
00210 WordTreeOperand::Optimize()
00211 {
00212
00213
00214
00215 unsigned int i;
00216 for(i = 0; i < cursors_length; i++)
00217 if(cursors[i]->Optimize() == NOTOK)
00218 return NOTOK;
00219 return OK;
00220 }
00221
00222 int WordTreeOperand::OptimizeOr(int proximity)
00223 {
00224 unsigned int ignore = 0;
00225 unsigned int ignore_mask = 0;
00226 unsigned int i;
00227 for(i = 0; i < cursors_length; i++) {
00228 int reduce;
00229
00230
00231
00232 switch(cursors[i]->IsA()) {
00233 case WORD_TREE_MANDATORY:
00234 ignore |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
00235 reduce = 1;
00236 break;
00237 case WORD_TREE_NOT:
00238 ignore |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
00239 ignore_mask |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
00240 reduce = 1;
00241 break;
00242 default:
00243 reduce = 0;
00244 break;
00245 }
00246
00247
00248
00249 if(reduce) {
00250 WordTreeOperand* old = (WordTreeOperand*)cursors[i];
00251 cursors[i] = old->cursors[0];
00252 old->cursors[0] = 0;
00253 old->cursors_length--;
00254 if(old->cursors_length > 0) {
00255 fprintf(stderr, "WordTreeOperand::OptimizeOr: too many cursors\n");
00256 return NOTOK;
00257 }
00258 delete old;
00259 }
00260 }
00261 return permutation.Initialize(cursors_length, ignore, ignore_mask, proximity);
00262 }
00263
00264 int
00265 WordTreeOperand::WalkInit()
00266 {
00267 unsigned int i;
00268 int ret = WORD_WALK_ATEND;
00269 for(i = 0; i < cursors_length; i++)
00270 if((ret = cursors[i]->WalkInit()) != OK)
00271 return ret;
00272 return (status = ret);
00273 }
00274
00275 int
00276 WordTreeOperand::WalkRewind()
00277 {
00278 unsigned int i;
00279 int ret = OK;
00280 for(i = 0; i < cursors_length; i++)
00281 if((ret = cursors[i]->WalkRewind()) != OK)
00282 return ret;
00283 status = OK;
00284 key_semantic.DocumentClear(pos);
00285 cursor_get_flags = DB_SET_RANGE;
00286 found.Clear();
00287 return ret;
00288 }
00289
00290 int
00291 WordTreeOperand::WalkFinish()
00292 {
00293 unsigned int i;
00294 int ret = OK;
00295 for(i = 0; i < cursors_length; i++)
00296 if((ret = cursors[i]->WalkFinish()) != OK)
00297 return ret;
00298 return ret;
00299 }
00300
00301 int
00302 WordTreeOperand::Seek(const WordKey& patch)
00303 {
00304 pos = patch;
00305 cursor_get_flags = DB_SET_RANGE;
00306
00307 unsigned int i;
00308 int ret = OK;
00309 for(i = 0; i < cursors_length; i++)
00310 if((ret = cursors[i]->Seek(patch)) != OK &&
00311 !cursors[i]->IsAtEnd())
00312 return ret;
00313 status = OK;
00314 return OK;
00315 }
00316
00317 int WordTreeOperand::Get(String& bufferout) const
00318 {
00319 bufferout << "( " << operator_name[IsA()] << " \"" << scope << "\" ";
00320 unsigned int i;
00321 for(i = 0; i < cursors_length; i++)
00322 bufferout << cursors[i]->Get() << " ";
00323 bufferout << " )";
00324 return OK;
00325 }
00326
00327
00328
00329 int WordTreeOptional::Optimize()
00330 {
00331 int ret;
00332 if((ret = WordTreeOperand::Optimize()) != OK)
00333 return ret;
00334
00335 if(UseProximity() != WORD_PERMUTE_PROXIMITY_ONLY) {
00336 if((ret = AscendingFrequency()) != OK)
00337 return ret;
00338 }
00339
00340 unsigned int stripped;
00341 unsigned int killed_mandatory;
00342 if((ret = StripNonExistent(stripped, killed_mandatory)) != OK)
00343 return ret;
00344
00345 if(killed_mandatory || (AllOrNothing() && stripped)) {
00346
00347
00348
00349
00350 unsigned int i;
00351 for(i = 0; i < cursors_length; i++)
00352 delete cursors[i];
00353 cursors_length = 0;
00354
00355 return OK;
00356 } else {
00357 return OptimizeOr(UseProximity());
00358 }
00359 }
00360
00361 int WordTreeOptional::ContextSaveList(StringList& list) const
00362 {
00363 int ret;
00364 if((ret = WordTreeOperand::ContextSaveList(list)) != OK)
00365 return ret;
00366
00367 if(UsePermutation()) {
00368 String* buffer = new String();
00369 permutation.Get(*buffer);
00370
00371 list.Add(buffer);
00372 }
00373
00374 {
00375 String* buffer = new String();
00376 if((ret = WordCursorOne::ContextSave(*buffer)) != OK)
00377 return ret;
00378
00379 list.Add(buffer);
00380 }
00381
00382 return OK;
00383 }
00384
00385 int WordTreeOptional::ContextRestoreList(StringList& list)
00386 {
00387 int ret;
00388 if((ret = WordTreeOperand::ContextRestoreList(list)) != OK)
00389 return ret;
00390
00391 if(UsePermutation()) {
00392 char* buffer = list[0];
00393 if((ret = permutation.Set(buffer)) != OK)
00394 return ret;
00395 list.Remove(0);
00396 }
00397
00398 {
00399 char* buffer = list[0];
00400 if(!buffer) return NOTOK;
00401 WordKey key(words->GetContext(), buffer);
00402 if((ret = Seek(key)) != OK)
00403 return ret;
00404 cursor_get_flags = DB_NEXT;
00405
00406 list.Remove(0);
00407 }
00408
00409 return OK;
00410 }
00411
00412 int WordTreeOptional::WalkNext()
00413 {
00414 WordKey& constraint = pos;
00415
00416
00417
00418 if(constraint.Empty()) {
00419 if(verbose) fprintf(stderr, " WordTreeOptional::WalkNext: required position or document is reset, i.e. before first possible entry\n");
00420 key_semantic.DocumentClear(constraint);
00421 }
00422
00423 int ret = OK;
00424
00425
00426
00427 if(cursor_get_flags == DB_NEXT)
00428 key_semantic.DocumentNext(constraint, has_uniq);
00429
00430 if((ret = Seek(constraint)) != OK)
00431 return ret;
00432
00433 int near = permutation.Proximity();
00434 WordTree* first = 0;
00435 for(unsigned int i = 0; i < cursors_length;) {
00436 int excluded = permutation.Excluded(i);
00437 WordTree& cursor = *(cursors[i]);
00438
00439 if(excluded && !permutation.Ignored(i)) {
00440 if(verbose) fprintf(stderr, "\n WordTreeOptional::WalkNext: looking for %s (ignored)\n", (char*)cursor.search);
00441 i++;
00442 continue;
00443 }
00444
00445 near = permutation.Proximity();
00446 if(verbose) fprintf(stderr, "\n WordTreeOptional::WalkNext: looking for %s (excluded = %s, proximity = %s)\n", (char*)cursor.search, (excluded ? "yes" : "no"), (near ? "yes" : "no" ));
00447
00448 int ret;
00449 if(excluded) {
00450 ret = SearchCursorNot(cursor, constraint);
00451 if(verbose) fprintf(stderr, " WordTreeOptional::WalkNext: Not -> %s\n", ret2str(ret));
00452 } else {
00453 if(near) {
00454 ret = SearchCursorNear(cursor, first, constraint, proximity);
00455 if(verbose) fprintf(stderr, " WordTreeOptional::WalkNext: Near -> %s\n", ret2str(ret));
00456 } else {
00457 ret = SearchCursorAnd(cursor, constraint, permutation);
00458 if(verbose) fprintf(stderr, " WordTreeOptional::WalkNext: And -> %s\n", ret2str(ret));
00459 }
00460 }
00461
00462 switch(ret & WORD_WALK_RESULT_MASK) {
00463 case WORD_WALK_ATEND:
00464
00465
00466
00467
00468 if(key_semantic.HasRealm()) {
00469 if(ret & WORD_WALK_ATEND_NOMATCH) {
00470
00471
00472
00473 if(cursor.GetFound().Key().Get(WORD_KEY_WORD) == cursor.GetSearch().Get(WORD_KEY_WORD)) {
00474
00475
00476
00477 if(SetRealm(cursor.GetFound().Key()) != OK)
00478 return NOTOK;
00479
00480
00481
00482 if(WalkRewind() != OK)
00483 return NOTOK;
00484 first = 0;
00485 i = 0;
00486 break;
00487 }
00488 }
00489 }
00490 if(UsePermutation()) {
00491
00492
00493
00494 if(verbose) fprintf(stderr, " WordTreeOptional::WalkNext: try next proximity/exclusion permutation\n");
00495 switch(permutation.Next()) {
00496
00497
00498
00499 case WORD_PERMUTE_END:
00500 if(verbose) fprintf(stderr, "\nWordTreeOptional::WalkNext: ATEND\n");
00501 return (status = WORD_WALK_ATEND);
00502 break;
00503
00504
00505
00506
00507 case WORD_PERMUTE_OK:
00508 if(key_semantic.HasRealm() && ClearRealm() != OK)
00509 return NOTOK;
00510 if(WalkRewind() != OK)
00511 return NOTOK;
00512 break;
00513 }
00514 first = 0;
00515 i = 0;
00516 } else {
00517 if(verbose) fprintf(stderr, "\nWordTreeOptional::WalkNext: ATEND\n");
00518 return (status = WORD_WALK_ATEND);
00519 }
00520 break;
00521 case WORD_WALK_REDO:
00522 break;
00523 case WORD_WALK_RESTART:
00524 first = 0;
00525 i = 0;
00526 break;
00527 case WORD_WALK_NEXT:
00528 i++;
00529 break;
00530 case NOTOK:
00531 default:
00532 if(verbose) fprintf(stderr, "\nWordTreeOptional::WalkNext: %s\n", ret2str(ret));
00533 return ret;
00534 break;
00535 }
00536 }
00537
00538 cursor_get_flags = DB_NEXT;
00539
00540 SetInfo();
00541
00542
00543
00544
00545 for(unsigned int i = 0; i < cursors_length; i++) {
00546 WordTree& cursor = *(cursors[i]);
00547 if(!permutation.Excluded(i)) {
00548 found.Key() = cursor.GetFound().Key();
00549 break;
00550 }
00551 }
00552
00553 if(verbose) fprintf(stderr, "\nWordTreeOptional::WalkNext: OK\n");
00554 return ret;
00555 }
00556
00557 int WordTreeOptional::Seek(const WordKey& position)
00558 {
00559 pos = position;
00560 cursor_get_flags = DB_SET_RANGE;
00561 status = OK;
00562
00563 unsigned int i;
00564 for(i = 0; i < cursors_length; i++) {
00565 if(!permutation.Excluded(i)) {
00566 WordTree& cursor = *(cursors[i]);
00567 return cursor.Seek(position);
00568 }
00569 }
00570
00571 fprintf(stderr, " WordTreeOptional::Seek: failed\n");
00572 return NOTOK;
00573 }
00574
00575
00576 void WordTreeOptional::SetInfo()
00577 {
00578 unsigned int i;
00579 for(i = 0; i < cursors_length; i++)
00580 cursors[i]->SetInfo();
00581
00582 info.trunc();
00583
00584 for(i = 0; i < cursors_length; i++) {
00585 WordTree& cursor = *(cursors[i]);
00586
00587 if(!permutation.Excluded(i))
00588 info << cursor.info << " ";
00589 }
00590
00591 info << (permutation.Proximity() ? "proximity" : "");
00592 }
00593
00594 int WordTreeOptional::Count(unsigned int& count) const
00595 {
00596 unsigned int count_and = 0;
00597
00598 count = 0;
00599 for(unsigned int i = 0; i < cursors_length; i++) {
00600 unsigned int cursor_count;
00601 cursors[i]->Count(cursor_count);
00602 if(permutation.Ignored(i) && !permutation.Excluded(i)) {
00603 count_and = count_and ? HtMIN(count_and, cursor_count) : cursor_count;
00604 } else {
00605 count += cursor_count;
00606 }
00607 }
00608
00609 count = count_and ? count_and : count;
00610
00611 return OK;
00612 }
00613
00614 int WordTreeOptional::SearchCursorNear(WordTree& cursor, WordTree*& master, WordKey& constraint, int proximity)
00615 {
00616 int is_master = master == 0 || master == &cursor;
00617 if(master == 0) master = &cursor;
00618 const WordKey& masterKey = master->GetFound().Key();
00619
00620 int direction = key_semantic.LocationCompare(constraint, cursor.GetFound().Key(), proximity);
00621 if(verbose) {
00622 fprintf(stderr, " WordTreeOptional::SearchCursorNear: required position is %s\n", (char*)(constraint.Get()));
00623 fprintf(stderr, " WordTreeOptional::SearchCursorNear: cursor %s is %s\n",
00624 (cursor.IsAtEnd() ? "" : (char*)(cursor.GetFound().Key().Get())),
00625 (cursor.IsAtEnd() ? "at end" : ((direction == 0 ? "ok" : (direction < 0 ? "after required position" : "before required position")))));
00626 }
00627
00628
00629
00630
00631
00632 if(direction == 0) {
00633
00634
00635
00636
00637 if(is_master)
00638 key_semantic.LocationSet(cursor.GetFound().Key(), constraint);
00639
00640
00641
00642 key_semantic.LocationNearLowest(constraint, proximity);
00643 return WORD_WALK_NEXT;
00644
00645
00646
00647
00648 } else if(direction > 0) {
00649
00650
00651
00652 cursor.Seek(constraint);
00653 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorNear: seek to required position\n");
00654 int ret;
00655 if((ret = cursor.WalkNext()) == OK) {
00656
00657
00658
00659
00660
00661 if(is_master) {
00662 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorNear: required position is copied from master cursor\n");
00663 key_semantic.Location2Document(constraint);
00664 }
00665
00666
00667
00668 return WORD_WALK_REDO;
00669 } else {
00670 return ret;
00671 }
00672
00673
00674
00675
00676
00677
00678 } else if(direction < 0) {
00679
00680
00681
00682
00683 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorNear: required position is copied from cursor\n");
00684 key_semantic.DocumentSet(cursor.GetDocument(), constraint);
00685
00686
00687
00688
00689
00690
00691
00692 if(!is_master && key_semantic.DocumentCompare(masterKey, constraint) == 0) {
00693 key_semantic.LocationSet(masterKey, constraint);
00694 key_semantic.LocationNext(constraint);
00695 }
00696
00697
00698
00699 return WORD_WALK_RESTART;
00700 } else {
00701 fprintf(stderr, " WordTreeOptional::WordCursorNear: reached unreachable statement\n");
00702 return NOTOK;
00703 }
00704 return NOTOK;
00705 }
00706
00707 int WordTreeOptional::SearchCursorNot(WordTree& cursor, WordKey& document)
00708 {
00709 if(key_semantic.HasRealm()) {
00710
00711
00712
00713
00714
00715 WordKey search = cursor.GetSearch();
00716 key_semantic.DocumentCopy(document, cursor.GetSearch());
00717 cursor.UndefinedRealm();
00718 cursor.WalkRewind();
00719 cursor.Seek(document);
00720 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorNot: seek to required document\n");
00721 if(cursor.WalkNext() != OK && !cursor.IsAtEnd())
00722 return NOTOK;
00723 int ret;
00724 if(cursor.IsAtEnd()) {
00725 ret = WORD_WALK_NEXT;
00726 } else {
00727 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorNot: required document incremented\n");
00728
00729
00730
00731
00732 key_semantic.DocumentNext(document, has_uniq);
00733
00734
00735
00736 ret = WORD_WALK_RESTART;
00737 }
00738 cursor.GetSearch() = search;
00739 cursor.WalkRewind();
00740 return ret;
00741 } else {
00742 int direction = key_semantic.DocumentCompare(document, cursor.GetFound().Key());
00743 if(verbose) {
00744 fprintf(stderr, " WordTreeOptional::SearchCursorNot: required document is %s\n", (char*)(document.Get()));
00745 fprintf(stderr, " WordTreeOptional::SearchCursorNot: cursor %s is %s\n",
00746 (cursor.IsAtEnd() ? "" : (char*)(cursor.GetFound().Key().Get())),
00747 (cursor.IsAtEnd() ? "at end" : ((direction == 0 ? "ok" : (direction < 0 ? "after required document" : "before required document")))));
00748 }
00749
00750
00751
00752
00753
00754
00755
00756
00757 if(direction < 0 || cursor.IsAtEnd()) {
00758 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorNot: cursor does not match in required document\n");
00759 return WORD_WALK_NEXT;
00760
00761
00762
00763
00764 } else if(direction > 0) {
00765
00766
00767
00768 cursor.Seek(document);
00769 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorNot: seek to required document\n");
00770 if(cursor.WalkNext() != OK && !cursor.IsAtEnd())
00771 return NOTOK;
00772
00773
00774
00775
00776
00777 return WORD_WALK_REDO;
00778
00779
00780
00781
00782
00783
00784
00785 } else if(direction == 0) {
00786 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorNot: required document incremented\n");
00787
00788
00789
00790
00791 key_semantic.DocumentNext(document, has_uniq);
00792
00793
00794
00795 return WORD_WALK_RESTART;
00796 } else {
00797 fprintf(stderr, " WordTreeOptional::WordCursorNot: reached unreachable statement\n");
00798 return NOTOK;
00799 }
00800 }
00801 return NOTOK;
00802 }
00803
00804 int WordTreeOptional::SearchCursorAnd(WordTree& cursor, WordKey& document, WordExclude& permutation)
00805 {
00806 int direction = key_semantic.DocumentCompare(document, cursor.GetFound().Key());
00807 if(verbose) {
00808 fprintf(stderr, " WordTreeOptional::SearchCursorAnd: required document is %s\n", (char*)(document.Get()));
00809 fprintf(stderr, " WordTreeOptional::SearchCursorAnd: cursor %s is %s\n",
00810 (cursor.IsAtEnd() ? "" : (char*)(cursor.GetFound().Key().Get())),
00811 (cursor.IsAtEnd() ? "at end" : ((direction == 0 ? "ok" : (direction < 0 ? "after required document" : "before required document")))));
00812 }
00813
00814
00815
00816
00817
00818
00819
00820 if(direction == 0) {
00821 return WORD_WALK_NEXT;
00822
00823
00824
00825
00826 } else if(direction > 0) {
00827
00828
00829
00830 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorAnd: seek to required document\n");
00831 cursor.Seek(document);
00832 int ret;
00833 if((ret = cursor.WalkNext()) == OK)
00834 return WORD_WALK_REDO;
00835 else
00836 return ret;
00837
00838
00839
00840
00841
00842
00843
00844
00845 } else if(direction < 0) {
00846
00847
00848
00849 if(verbose) fprintf(stderr, " WordTreeOptional::SearchCursorAnd: required document is copied from cursor\n");
00850 key_semantic.DocumentSet(cursor.GetDocument(), document);
00851
00852
00853
00854
00855 return WORD_WALK_RESTART;
00856 } else {
00857 fprintf(stderr, " WordTreeOptional::WordCursorAnd: reached unreachable statement\n");
00858 return NOTOK;
00859 }
00860 return NOTOK;
00861 }
00862
00863
00864
00865
00866 class WordSort {
00867 public:
00868 unsigned int frequency;
00869 WordTree *cursor;
00870 };
00871
00872
00873
00874
00875 static int ascending_frequency(const void *a, const void *b)
00876 {
00877 const WordSort& a_cursor = *(WordSort*)a;
00878 const WordSort& b_cursor = *(WordSort*)b;
00879
00880 return a_cursor.frequency - b_cursor.frequency;
00881 }
00882
00883 int WordTreeOptional::AscendingFrequency()
00884 {
00885
00886
00887
00888 WordSort *tmp = new WordSort[cursors_length];
00889
00890 memset((char*)tmp, '\0', sizeof(WordSort[cursors_length]));
00891
00892 unsigned int i;
00893 for(i = 0; i < cursors_length; i++) {
00894 unsigned int frequency;
00895 if(cursors[i]->Noccurrence(frequency) != OK) {
00896 delete [] tmp;
00897 return NOTOK;
00898 }
00899 if(verbose > 2) fprintf(stderr, " WordTreeOptional::AscendingFrequency: %s occurs %d times\n", (char*)cursors[i]->search, frequency);
00900 tmp[i].frequency = frequency;
00901 tmp[i].cursor = cursors[i];
00902 }
00903
00904 memset((char*)cursors, '\0', sizeof(WordTree*) * cursors_length);
00905
00906 qsort((void *)tmp, cursors_length, sizeof(WordSort), &ascending_frequency);
00907
00908 for(i = 0; i < cursors_length; i++)
00909 cursors[i] = tmp[i].cursor;
00910
00911 delete [] tmp;
00912 return OK;
00913 }
00914
00915 int WordTreeOptional::StripNonExistent(unsigned int& stripped, unsigned int& killed_mandatory)
00916 {
00917 stripped = 0;
00918 killed_mandatory = 0;
00919
00920 WordTree** tmp = new WordTree*[cursors_length];
00921 memset((char*)tmp, '\0', sizeof(WordTree*[cursors_length]));
00922
00923 unsigned int from;
00924 unsigned int to;
00925
00926 for(to = from = 0; from < cursors_length; from++) {
00927 unsigned int frequency;
00928 if(cursors[from]->Noccurrence(frequency) != OK) {
00929 delete [] tmp;
00930 return NOTOK;
00931 }
00932
00933 if(verbose > 2) fprintf(stderr, " WordTreeOptional::StripNonExistent: %s occurs %d times\n", (char*)cursors[from]->search, frequency);
00934 if(frequency > 0) {
00935 tmp[to++] = cursors[from];
00936 } else {
00937 if(cursors[from]->IsA() == WORD_TREE_MANDATORY)
00938 killed_mandatory++;
00939 delete cursors[from];
00940 stripped++;
00941 }
00942 }
00943
00944 memset((char*)cursors, '\0', sizeof(WordTree*) * cursors_length);
00945
00946 cursors_length = to;
00947 unsigned int i;
00948 for(i = 0; i < cursors_length; i++)
00949 cursors[i] = tmp[i];
00950
00951 delete [] tmp;
00952
00953 return OK;
00954 }
00955
00956
00957
00958 int WordTreeOr::Optimize()
00959 {
00960 int ret;
00961 if((ret = WordTreeOperand::Optimize()) != OK)
00962 return ret;
00963
00964 if((ret = AscendingFrequency()) != OK)
00965 return ret;
00966
00967 unsigned int stripped;
00968 unsigned int killed_mandatory;
00969 if((ret = StripNonExistent(stripped, killed_mandatory)) != OK)
00970 return ret;
00971
00972 return OptimizeOr(WORD_PERMUTE_PROXIMITY_NO);
00973 }
00974
00975 int WordTreeOr::ContextSaveList(StringList& list) const
00976 {
00977 int ret;
00978 if((ret = WordTreeOperand::ContextSaveList(list)) != OK)
00979 return ret;
00980
00981 {
00982 String* buffer = new String();
00983 permutation.Get(*buffer);
00984
00985 list.Add(buffer);
00986 }
00987
00988 {
00989 String* buffer = new String();
00990 if((ret = WordCursorOne::ContextSave(*buffer)) != OK)
00991 return ret;
00992
00993 list.Add(buffer);
00994 }
00995
00996 return OK;
00997 }
00998
00999 int WordTreeOr::ContextRestoreList(StringList& list)
01000 {
01001 int ret;
01002 if((ret = WordTreeOperand::ContextRestoreList(list)) != OK)
01003 return ret;
01004
01005 {
01006 char* buffer = list[0];
01007 if((ret = permutation.Set(buffer)) != OK)
01008 return ret;
01009 list.Remove(0);
01010 }
01011
01012 {
01013 char* buffer = list[0];
01014 if(!buffer) return NOTOK;
01015 WordKey key(words->GetContext(), buffer);
01016 if((ret = Seek(key)) != OK)
01017 return ret;
01018 cursor_get_flags = DB_NEXT;
01019
01020 list.Remove(0);
01021 }
01022
01023 return OK;
01024 }
01025
01026 void WordTreeOr::SetInfo()
01027 {
01028 unsigned int i;
01029 for(i = 0; i < cursors_length; i++)
01030 cursors[i]->SetInfo();
01031
01032 info.trunc();
01033
01034 for(i = 0; i < cursors_length; i++) {
01035 WordTree& cursor = *(cursors[i]);
01036
01037 if(!permutation.Excluded(i) &&
01038 !cursor.IsAtEnd() &&
01039 key_semantic.DocumentCompare(cursor.GetFound().Key(), GetFound().Key()) == 0) {
01040 info << cursor.info << " ";
01041 }
01042 }
01043 }
01044
01045 int WordTreeOr::WalkNext()
01046 {
01047 WordKey& constraint = pos;
01048
01049
01050
01051 if(constraint.Empty())
01052 key_semantic.DocumentClear(constraint);
01053
01054 WordKey candidate(words->GetContext());
01055 int match_ok;
01056 do {
01057 int ret;
01058 unsigned int i;
01059 candidate.Clear();
01060
01061
01062
01063 if(cursor_get_flags == DB_NEXT)
01064 key_semantic.DocumentNext(constraint, has_uniq);
01065
01066 if((ret = Seek(constraint)) != OK)
01067 return ret;
01068
01069 match_ok = 1;
01070
01071
01072
01073
01074
01075 for(i = 0; i < cursors_length; i++) {
01076 if(permutation.Excluded(i))
01077 continue;
01078 WordTree& cursor = *(cursors[i]);
01079
01080 switch((ret = cursor.WalkNext()) & WORD_WALK_RESULT_MASK) {
01081 case WORD_WALK_ATEND:
01082
01083
01084
01085 break;
01086 case OK:
01087
01088
01089
01090
01091
01092 if(candidate.Empty() ||
01093 key_semantic.DocumentCompare(candidate, cursor.GetFound().Key()) > 0) {
01094 key_semantic.DocumentSet(cursor.GetDocument(), candidate);
01095 }
01096 break;
01097 default:
01098 return ret;
01099 break;
01100 }
01101 }
01102
01103
01104
01105
01106 if(candidate.Empty())
01107 return WORD_WALK_ATEND;
01108
01109 found.Key() = candidate;
01110
01111 SetInfo();
01112
01113 if(permutation.ExcludedCount() > 0) {
01114 if((ret = Seek(candidate)) != OK)
01115 return ret;
01116
01117
01118
01119
01120 for(i = 0; i < cursors_length && match_ok; i++) {
01121 if(!permutation.Excluded(i))
01122 continue;
01123 WordTree& cursor = *(cursors[i]);
01124
01125 switch((ret = cursor.WalkNext()) & WORD_WALK_RESULT_MASK) {
01126 case WORD_WALK_ATEND:
01127
01128
01129
01130 break;
01131 case OK:
01132
01133
01134
01135
01136
01137 if(key_semantic.DocumentCompare(candidate, cursor.GetFound().Key()) == 0) {
01138 constraint = candidate;
01139 match_ok = 0;
01140 }
01141 break;
01142 default:
01143 return ret;
01144 break;
01145 }
01146
01147 }
01148 }
01149
01150 cursor_get_flags = DB_NEXT;
01151
01152 } while(!match_ok);
01153
01154 constraint = candidate;
01155
01156 return OK;
01157 }
01158
01159
01160
01161
01162
01163
01164
01165
01166