WordSearch.h

Go to the documentation of this file.
00001 //
00002 // Part of the ht://Dig package   <http://www.htdig.org/>
00003 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00004 // For copyright details, see the file COPYING in your distribution
00005 // or the GNU General Public License version 2 or later
00006 // <http://www.gnu.org/copyleft/gpl.html>
00007 //
00008 // $Id: WordSearch_8h-source.html,v 1.1 2008/06/08 10:13:23 sebdiaz Exp $
00009 //
00010 // ************************* WordSearch implementation ********************
00011 //
00012 // NAME
00013 //
00014 // Solve a query from a WordTree syntax tree
00015 //
00016 // SYNOPSIS
00017 //
00018 // #include <WordSearch.h>
00019 //
00020 // WordTree* expr = get_query();
00021 // WordSearch search;
00022 // search.limit_count = NUMBER_OF_RESULTS;
00023 // WordMatch* search.Search(expr);
00024 // ...
00025 //  
00026 // DESCRIPTION
00027 //
00028 // The WordSearch class is a wrapper to query an inverted index
00029 // using a WordTree syntax tree. 
00030 // 
00031 // END
00032 //
00033 
00034 #ifndef _WordSearch_h
00035 #define _WordSearch_h
00036 
00037 #include <WordList.h>
00038 #include <WordMatch.h>
00039 #include <WordTree.h>
00040 
00041 class WordSearch {
00042 public:
00043   WordSearch(WordList* words);
00044 
00045   ~WordSearch() {
00046     if(expr) delete expr;
00047   }
00048 
00049   //-
00050   // Set the list of documents that must be ignored.
00051   //
00052   inline int SetResults(WordResults* nresults) { results = nresults; return OK; }
00053   //-
00054   // Get the list of documents that must be ignored.
00055   //
00056   inline WordResults* GetResults() { return results; }
00057 
00058   //-
00059   // Save the context of the last document retrieved in the context_out
00060   // data member.
00061   //
00062   int ContextSave(int status);
00063   //-
00064   // Restore a search context from the context_in data member.
00065   //
00066   int ContextRestore();
00067 
00068   //-
00069   // First call SearchFromCache and then SearchFromIndex if SearchFromCache
00070   // returned 0.
00071   //
00072   WordMatches *Search();
00073   //-
00074   // Attempt to retrieve the results from the cache. Returns 0 if
00075   // the search cannot be resolved from the cache. Returns the list
00076   // of matches if the search can be resolved from the cache. If the
00077   // cache only contains part of the desired results, call SearchFromIndex
00078   // to get the others. If the cache does not contain any of the desired
00079   // results, return 0.
00080   //
00081   int SearchFromCache();
00082   //-
00083   // Perform a search from the <b>expr</b> specifications.
00084   // Restore the context from <i>context_in</i> on <b>expr</b>.
00085   // Then skip (using WalkNext) <i>limit_bottom</i> entries.
00086   // Then collect in a WordMatch array of size <i>limit_count</i>
00087   // each match returned by WalkNext. When finished store
00088   // the context (ContextSave) in <i>context_out</i>.
00089   // It is the responsibility of the caller to free the WordMatch
00090   // array. If no match are found a null pointer is returned.
00091   //
00092   int SearchFromIndex(unsigned int length);
00093 
00094   //-
00095   // Search backend, only run the WalkNext loop but does not
00096   // allocate/deallocate data. If limit_bottom is above all matches
00097   // return the last valid limit_count range and reset limit_bottom 
00098   // accordingly.
00099   //
00100   int SearchLoop(WordTree *expr, unsigned int length);
00101 
00102   inline int Verbose(int verbosity) { return verbose = verbosity; }
00103 
00104   //
00105   // Internal
00106   //
00107   WordList* words;
00108   //-
00109   // A list of documents that must be ignored
00110   //
00111   WordResults *results;
00112   int verbose;
00113 
00114   //
00115   // Input/Output
00116   //
00117   //
00118   // Input: Absolute position of the document pointed by context_in. The
00119   //        limit_bottom position is relative to limit_base.
00120   // Output: Absolute position of the first document returned.
00121   //
00122   unsigned int limit_base;
00123 
00124   //
00125   // Input
00126   //
00127   //
00128   // Maximum number of matches returned
00129   //
00130   unsigned int limit_count;
00131   //
00132   // Query tree
00133   //
00134   WordTree* expr;
00135   
00136   //
00137   // Output
00138   //
00139   //
00140   // Array of at most limit_count matches. The number of valid elements in
00141   // the array is matches_length;
00142   //
00143   WordMatches* matches;
00144   //
00145   // Estimated number of matches.
00146   //
00147   unsigned int matches_total;
00148 };
00149 
00150 #endif /* _WordSearch_h */

Generated on Sun Jun 8 10:56:40 2008 for GNUmifluz by  doxygen 1.5.5