WordKeySemantic.h

Go to the documentation of this file.
00001 //
00002 // Part of the ht://Dig package   <http://www.htdig.org/>
00003 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00004 // For copyright details, see the file COPYING in your distribution
00005 // or the GNU General Public License version 2 or later
00006 // <http://www.gnu.org/copyleft/gpl.html>
00007 //
00008 // $Id: WordKeySemantic_8h-source.html,v 1.1 2008/06/08 10:13:12 sebdiaz Exp $
00009 //
00010 // ************************* Document definition implementation ***********
00011 
00012 #ifdef HAVE_CONFIG_H
00013 #include <config.h>
00014 #endif /* HAVE_CONFIG_H */
00015 
00016 #ifdef HAVE_UNISTD_H
00017 #include <unistd.h>
00018 #endif /* HAVE_UNISTD_H */
00019 
00020 #ifndef _WordKeySemantic_h
00021 #define _WordKeySemantic_h
00022 
00023 #include <WordContext.h>
00024 
00025 // *********************** WordKeySemantic implementation ********************
00026 //
00027 // NAME
00028 //
00029 // encapsulate WordKey semantic for document and location
00030 //
00031 // SYNOPSIS
00032 //
00033 // #include <WordKeySemantic.h>
00034 //
00035 // #define SERVER 1
00036 // #define URL 2
00037 // #define LOCATION 3
00038 //
00039 // static int document[] = {
00040 //   SERVER,
00041 //   URL
00042 // };
00043 // 
00044 // WordKeySemantic semantic;
00045 // semantic.Initialize(document, sizeof(document)/sizeof(int), LOCATION);
00046 //
00047 // DESCRIPTION
00048 //
00049 // Encapsulate the semantic of a WordKey object fields. It defines
00050 // what a document and a location are. It implements the set of
00051 // operation that a search needs to perform given the fact that it
00052 // implements a search whose purpose is to retrieve a document and
00053 // wants to implement proximity search based on a word location.
00054 //
00055 //
00056 // END
00057 //
00058 // A document is a set of fields in a given order. 
00059 // A location is a field.
00060 // The actual fields used to implement WordKeySemantic methods are
00061 // set with the Initialize method.
00062 //
00063 class WordKeySemantic {
00064 public:
00065   WordKeySemantic(WordContext *ncontext);
00066   ~WordKeySemantic();
00067 
00068   //-
00069   // Set the actual field numbers that define what a document is and
00070   // what a location is. The <b>document_arg<b> is a list of WordKey field
00071   // positions of length <b>document_length_arg</b> that must be adjacent.
00072   // The <b>location_arg</b> is the WordKey field position of the word
00073   // location within a document.
00074   // Return OK on success, NOTOK on failure.
00075   //
00076   int Initialize(int* document_arg, int document_length_arg, int location_arg, int uniq_arg);
00077 
00078   //
00079   // A realm exists if there is data between the word and the document.
00080   //
00081   //-
00082   // Return 1 if document is at the beginning of the key, 0 otherwise.
00083   // For instance a document that is defined as 1 2 3 is at the beginning
00084   // of a key and a document that is defined as 2 3 is not at the beginning
00085   // of a key.
00086   //
00087   int HasRealm() {
00088     if(!document) {
00089       fprintf(stderr, "WordKeySemantic::HasRealm: document is not defined\n");
00090       return 0;
00091     }
00092     return document[0] != 1;
00093   }
00094   //-
00095   // Clear <b>to</b> and copy the realm in <b>from</b> into <b>to.</b>
00096   //
00097   void RealmSet(const WordKey& from, WordKey& to);
00098   //-
00099   // Undefine the realm fields of <b>key</b>.
00100   //
00101   void RealmUndefined(WordKey& key);
00102   //-
00103   // Copy the realm in <b>from</b> into <b>to.</b>
00104   //
00105   void RealmCopy(const WordKey& from, WordKey& to);
00106   //-
00107   // Clear key and set all realm fields to 0.
00108   //
00109   void RealmClear(WordKey& key);
00110 
00111   //-
00112   // Return the index of the uniq field. It must be in the range of
00113   // indexes defined for the document.
00114   //
00115   inline int Uniq() { return uniq; }
00116 
00117   //-
00118   // Clear <b>to</b> and copy the document in <b>from</b> into <b>to.</b>
00119   //
00120   void DocumentSet(const WordKey& from, WordKey& to);
00121   //-
00122   // Copy the document in <b>from</b> into <b>to.</b>
00123   //
00124   void DocumentCopy(const WordKey& from, WordKey& to);
00125   //-
00126   // Increment the document in <b>key</b> using the <i>SetToFollowing</i>
00127   // method of WordKey. <b>uniq</b> is the WordKey position at which the 
00128   // increment starts.
00129   //
00130   void DocumentNext(WordKey& key, int use_uniq);
00131   //-
00132   // Compare the document fields defined in both <b>a</b> and <b>b</b>
00133   // and return the difference a - b, as in strcmp. If all document
00134   // fields in <b>a</b> or <b>b</b> are undefined return 1.
00135   //
00136   int DocumentCompare(const WordKey& a, const WordKey& b);
00137   //-
00138   // Set all document fields to 0.
00139   //
00140   int DocumentClear(WordKey& key);
00141   //-
00142   // Set all document fields to undefined.
00143   //
00144   int DocumentUndefined(WordKey& key);
00145   //-
00146   // Return the index of the first document field.
00147   //
00148   inline int DocumentOffset() const { return document[0]; }
00149   //-
00150   // Return the length of the document fields.
00151   //
00152   inline int DocumentLength() const { return document_length; }
00153 
00154   //
00155   // These functions and only these know what a location is. 
00156   // This should really be a class containing function pointers and be
00157   // given as argument to the search algorithm.
00158   //
00159   //-
00160   // Copy the document and location in <b>from</b> into <b>to.</b>
00161   //
00162   void LocationSet(const WordKey& from, WordKey& to);
00163   //-
00164   // Increment the document and location in <b>key</b> 
00165   // using the <i>SetToFollowing</i>
00166   // method of WordKey. 
00167   //
00168   void LocationNext(WordKey& key);
00169   //-
00170   // Compare <b>expected</b> location to <b>actual</b> location. Compares equal
00171   // as long as expected location is at a maximum distance of <b>proximity</b>
00172   // of actual. If <b>actual</b> only has undefined field, return > 0.
00173   // <b>expected</b> must always be the lowest possible bound.
00174   // <b>actual</b> is tolerated if it is greater than <b>actual</b> but not
00175   // greater than <b>proximity</b> if <b>proximity</b> > 0 or abs(<b>proximity</b>) * 2 if
00176   // <b>proximity</b> < 0.
00177   // Return the difference expected - actual.
00178   //
00179   int  LocationCompare(const WordKey& expected, const WordKey& actual, int proximity = 0);
00180   //-
00181   // <b>key</b> is the expected location of a searched key. 
00182   // LocationNearLowest modifies <b>key</b> to add tolerance accroding to
00183   // <b>proximity</b>. 
00184   //
00185   // The idea is that <b>key</b> will be the lowest possible match for 
00186   // for the <b>proximity</b> range. If <proxmity> is positive, <b>key</b>
00187   // is already the lowest possible match since we accept [0 proximity].
00188   // If <b>proximity</b> is negative, substract it since we accept
00189   // [-proximity proximity].
00190   //
00191   // For better understanding see the functions in which it is used.
00192   //
00193   void LocationNearLowest(WordKey& key, int proximity);
00194 
00195   //-
00196   // Undefine the location field in <b>key.</b>.
00197   //
00198   void Location2Document(WordKey& key);
00199 
00200   inline int Verbose(int verbosity) { return verbose = verbosity; }
00201 
00202 protected:
00203   int* document;
00204   int document_length;
00205   int location;
00206   int uniq;
00207   WordContext *context;
00208 
00209   int verbose;
00210 };
00211 
00212 #endif /* _WordKeySemantic_h */

Generated on Sun Jun 8 10:56:40 2008 for GNUmifluz by  doxygen 1.5.5