00001 // 00002 // Part of the ht://Dig package <http://www.htdig.org/> 00003 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group 00004 // For copyright details, see the file COPYING in your distribution 00005 // or the GNU General Public License version 2 or later 00006 // <http://www.gnu.org/copyleft/gpl.html> 00007 // 00008 // $Id: WordSearch_8cc-source.html,v 1.1 2008/06/08 10:13:23 sebdiaz Exp $ 00009 // 00010 #ifdef HAVE_CONFIG_H 00011 #include <config.h> 00012 #endif /* HAVE_CONFIG_H */ 00013 00014 #ifdef HAVE_UNISTD_H 00015 #include <unistd.h> 00016 #endif /* HAVE_UNISTD_H */ 00017 00018 #include <WordSearch.h> 00019 #include <WordResults.h> 00020 00021 WordSearch::WordSearch(WordList* nwords) 00022 { 00023 // 00024 // Internal 00025 // 00026 words = nwords; 00027 verbose = 0; 00028 00029 // 00030 // Input/Output 00031 // 00032 limit_base = 0; 00033 00034 // 00035 // Input 00036 // 00037 limit_count = 0; 00038 expr = 0; 00039 00040 // 00041 // Output 00042 // 00043 matches = 0; 00044 } 00045 00046 int WordSearch::ContextRestore() 00047 { 00048 String context_in; 00049 if(results->GetContext(context_in) != OK) 00050 return NOTOK; 00051 00052 return expr->ContextRestore(context_in); 00053 } 00054 00055 int WordSearch::ContextSave(int status) 00056 { 00057 String tmp; 00058 00059 if(status != WORD_WALK_ATEND) { 00060 if(expr->ContextSave(tmp) != OK) 00061 return NOTOK; 00062 } 00063 00064 results->PutContext(tmp); 00065 00066 return OK; 00067 } 00068 00069 WordMatches *WordSearch::Search() 00070 { 00071 // 00072 // Build space for results 00073 // 00074 matches = new WordMatches(words->GetContext()); 00075 matches->Allocate(limit_count + 1); 00076 00077 int ret; 00078 // 00079 // Call SearchFromIndex if returned value is neither OK (got them all) 00080 // or WORD_WALK_ATEND (did not get them all but at end of search anyway). 00081 // 00082 if((ret = SearchFromCache()) == WORD_WALK_END_CACHE) { 00083 unsigned int count; 00084 if(results->Count(count) == NOTOK) 00085 return 0; 00086 ret = SearchFromIndex(limit_base - count + limit_count); 00087 } 00088 00089 // 00090 // Discard results if nothing was found or error occured 00091 // 00092 if(ret == NOTOK || matches->length <= 0) { 00093 delete matches; 00094 matches = 0; 00095 } 00096 00097 return matches; 00098 } 00099 00100 int WordSearch::SearchFromCache() 00101 { 00102 int filled = results->Filled(); 00103 unsigned int available; 00104 unsigned int base = limit_base; 00105 00106 if(results->Count(available) != OK) 00107 return NOTOK; 00108 00109 if(available <= limit_base) { 00110 base = (available / limit_count) * limit_count; 00111 // 00112 // If the cache is not filled, it is the responsibility of 00113 // SearchFromIndex to set the limit_base according to what is 00114 // found when searching. 00115 // 00116 if(filled) { 00117 limit_base = base; 00118 } 00119 } 00120 00121 if(results->GetMatchesTotal(matches_total) != OK) 00122 return NOTOK; 00123 00124 return results->Get(matches, limit_count, base); 00125 } 00126 00127 int WordSearch::SearchFromIndex(unsigned int length) 00128 { 00129 int ret = 0; 00130 00131 if(WordTree::TopLevelOptimize(expr) != OK) 00132 return NOTOK; 00133 00134 /* 00135 * This happens when the optimization decided that the 00136 * expression was meaningless. 00137 */ 00138 if(this==0 || expr == 0) 00139 return NOTOK; 00140 00141 if(expr->Count(matches_total) != OK) 00142 return NOTOK; 00143 00144 // 00145 // Move before first possible position. 00146 // 00147 if((ret = expr->WalkInit()) != OK) 00148 goto end; 00149 00150 if((ret = ContextRestore()) == NOTOK) 00151 goto end; 00152 00153 // 00154 // Set the result list only after the context was restored otherwise 00155 // it will interfere with the re-initialization of the context because 00156 // WalkNext will skip previously seen documents. 00157 // 00158 if((ret = expr->SetResults(results)) != OK) 00159 goto end; 00160 00161 ret = SearchLoop(expr, length); 00162 00163 // 00164 // Don't bother saving the context if at end of 00165 // search (WORD_WALK_ATEND) or error (NOTOK) 00166 // 00167 if(ret != NOTOK && (ret = ContextSave(ret)) == NOTOK) 00168 goto end; 00169 00170 end: 00171 expr->WalkFinish(); 00172 00173 if(results) { 00174 if(results->PutMatchesTotal(matches_total) != OK) 00175 return NOTOK; 00176 } 00177 00178 return ret; 00179 } 00180 00181 int WordSearch::SearchLoop(WordTree *expr, unsigned int length) 00182 { 00183 int ret = OK; 00184 unsigned int i; 00185 WordResults* results = expr->GetResults(); 00186 unsigned int count; 00187 00188 if(results->Count(count) != OK) 00189 return NOTOK; 00190 00191 for(i = 0; i < length; i++) { 00192 if((ret = expr->WalkNext()) != OK) { 00193 if(ret != WORD_WALK_ATEND) 00194 return ret; 00195 break; 00196 } else { 00197 WordMatch* match = matches->matches[matches->length]; 00198 match->match = expr->GetDocument(); 00199 if(expr->IsA() != WORD_TREE_LITERAL) 00200 match->info = ((WordTreeOperand*)expr)->GetInfo(); 00201 if((ret = results->Put(*match, count + i)) != OK) 00202 return ret; 00203 if(verbose) fprintf(stderr, "WordSearch::SearchLoop: match %s\n", (char*)match->Get()); 00204 matches->length = (matches->length + 1) % limit_count; 00205 } 00206 } 00207 00208 if(i == 0) { 00209 ; 00210 } else { 00211 if(matches->length == 0) matches->length = limit_count; 00212 limit_base = ((count + i - 1) / limit_count) * limit_count; 00213 } 00214 00215 // 00216 // Invalidate matches that are above the list of valid matches 00217 // 00218 for(i = 0; i < matches->size; i++) 00219 matches->matches[i]->valid = i < matches->length; 00220 00221 return ret; 00222 }