00001 // 00002 // Part of the ht://Dig package <http://www.htdig.org/> 00003 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group 00004 // For copyright details, see the file COPYING in your distribution 00005 // or the GNU General Public License version 2 or later 00006 // <http://www.gnu.org/copyleft/gpl.html> 00007 // 00008 // $Id: WordKeySemantic_8h-source.html,v 1.1 2008/06/08 10:13:12 sebdiaz Exp $ 00009 // 00010 // ************************* Document definition implementation *********** 00011 00012 #ifdef HAVE_CONFIG_H 00013 #include <config.h> 00014 #endif /* HAVE_CONFIG_H */ 00015 00016 #ifdef HAVE_UNISTD_H 00017 #include <unistd.h> 00018 #endif /* HAVE_UNISTD_H */ 00019 00020 #ifndef _WordKeySemantic_h 00021 #define _WordKeySemantic_h 00022 00023 #include <WordContext.h> 00024 00025 // *********************** WordKeySemantic implementation ******************** 00026 // 00027 // NAME 00028 // 00029 // encapsulate WordKey semantic for document and location 00030 // 00031 // SYNOPSIS 00032 // 00033 // #include <WordKeySemantic.h> 00034 // 00035 // #define SERVER 1 00036 // #define URL 2 00037 // #define LOCATION 3 00038 // 00039 // static int document[] = { 00040 // SERVER, 00041 // URL 00042 // }; 00043 // 00044 // WordKeySemantic semantic; 00045 // semantic.Initialize(document, sizeof(document)/sizeof(int), LOCATION); 00046 // 00047 // DESCRIPTION 00048 // 00049 // Encapsulate the semantic of a WordKey object fields. It defines 00050 // what a document and a location are. It implements the set of 00051 // operation that a search needs to perform given the fact that it 00052 // implements a search whose purpose is to retrieve a document and 00053 // wants to implement proximity search based on a word location. 00054 // 00055 // 00056 // END 00057 // 00058 // A document is a set of fields in a given order. 00059 // A location is a field. 00060 // The actual fields used to implement WordKeySemantic methods are 00061 // set with the Initialize method. 00062 // 00063 class WordKeySemantic { 00064 public: 00065 WordKeySemantic(WordContext *ncontext); 00066 ~WordKeySemantic(); 00067 00068 //- 00069 // Set the actual field numbers that define what a document is and 00070 // what a location is. The <b>document_arg<b> is a list of WordKey field 00071 // positions of length <b>document_length_arg</b> that must be adjacent. 00072 // The <b>location_arg</b> is the WordKey field position of the word 00073 // location within a document. 00074 // Return OK on success, NOTOK on failure. 00075 // 00076 int Initialize(int* document_arg, int document_length_arg, int location_arg, int uniq_arg); 00077 00078 // 00079 // A realm exists if there is data between the word and the document. 00080 // 00081 //- 00082 // Return 1 if document is at the beginning of the key, 0 otherwise. 00083 // For instance a document that is defined as 1 2 3 is at the beginning 00084 // of a key and a document that is defined as 2 3 is not at the beginning 00085 // of a key. 00086 // 00087 int HasRealm() { 00088 if(!document) { 00089 fprintf(stderr, "WordKeySemantic::HasRealm: document is not defined\n"); 00090 return 0; 00091 } 00092 return document[0] != 1; 00093 } 00094 //- 00095 // Clear <b>to</b> and copy the realm in <b>from</b> into <b>to.</b> 00096 // 00097 void RealmSet(const WordKey& from, WordKey& to); 00098 //- 00099 // Undefine the realm fields of <b>key</b>. 00100 // 00101 void RealmUndefined(WordKey& key); 00102 //- 00103 // Copy the realm in <b>from</b> into <b>to.</b> 00104 // 00105 void RealmCopy(const WordKey& from, WordKey& to); 00106 //- 00107 // Clear key and set all realm fields to 0. 00108 // 00109 void RealmClear(WordKey& key); 00110 00111 //- 00112 // Return the index of the uniq field. It must be in the range of 00113 // indexes defined for the document. 00114 // 00115 inline int Uniq() { return uniq; } 00116 00117 //- 00118 // Clear <b>to</b> and copy the document in <b>from</b> into <b>to.</b> 00119 // 00120 void DocumentSet(const WordKey& from, WordKey& to); 00121 //- 00122 // Copy the document in <b>from</b> into <b>to.</b> 00123 // 00124 void DocumentCopy(const WordKey& from, WordKey& to); 00125 //- 00126 // Increment the document in <b>key</b> using the <i>SetToFollowing</i> 00127 // method of WordKey. <b>uniq</b> is the WordKey position at which the 00128 // increment starts. 00129 // 00130 void DocumentNext(WordKey& key, int use_uniq); 00131 //- 00132 // Compare the document fields defined in both <b>a</b> and <b>b</b> 00133 // and return the difference a - b, as in strcmp. If all document 00134 // fields in <b>a</b> or <b>b</b> are undefined return 1. 00135 // 00136 int DocumentCompare(const WordKey& a, const WordKey& b); 00137 //- 00138 // Set all document fields to 0. 00139 // 00140 int DocumentClear(WordKey& key); 00141 //- 00142 // Set all document fields to undefined. 00143 // 00144 int DocumentUndefined(WordKey& key); 00145 //- 00146 // Return the index of the first document field. 00147 // 00148 inline int DocumentOffset() const { return document[0]; } 00149 //- 00150 // Return the length of the document fields. 00151 // 00152 inline int DocumentLength() const { return document_length; } 00153 00154 // 00155 // These functions and only these know what a location is. 00156 // This should really be a class containing function pointers and be 00157 // given as argument to the search algorithm. 00158 // 00159 //- 00160 // Copy the document and location in <b>from</b> into <b>to.</b> 00161 // 00162 void LocationSet(const WordKey& from, WordKey& to); 00163 //- 00164 // Increment the document and location in <b>key</b> 00165 // using the <i>SetToFollowing</i> 00166 // method of WordKey. 00167 // 00168 void LocationNext(WordKey& key); 00169 //- 00170 // Compare <b>expected</b> location to <b>actual</b> location. Compares equal 00171 // as long as expected location is at a maximum distance of <b>proximity</b> 00172 // of actual. If <b>actual</b> only has undefined field, return > 0. 00173 // <b>expected</b> must always be the lowest possible bound. 00174 // <b>actual</b> is tolerated if it is greater than <b>actual</b> but not 00175 // greater than <b>proximity</b> if <b>proximity</b> > 0 or abs(<b>proximity</b>) * 2 if 00176 // <b>proximity</b> < 0. 00177 // Return the difference expected - actual. 00178 // 00179 int LocationCompare(const WordKey& expected, const WordKey& actual, int proximity = 0); 00180 //- 00181 // <b>key</b> is the expected location of a searched key. 00182 // LocationNearLowest modifies <b>key</b> to add tolerance accroding to 00183 // <b>proximity</b>. 00184 // 00185 // The idea is that <b>key</b> will be the lowest possible match for 00186 // for the <b>proximity</b> range. If <proxmity> is positive, <b>key</b> 00187 // is already the lowest possible match since we accept [0 proximity]. 00188 // If <b>proximity</b> is negative, substract it since we accept 00189 // [-proximity proximity]. 00190 // 00191 // For better understanding see the functions in which it is used. 00192 // 00193 void LocationNearLowest(WordKey& key, int proximity); 00194 00195 //- 00196 // Undefine the location field in <b>key.</b>. 00197 // 00198 void Location2Document(WordKey& key); 00199 00200 inline int Verbose(int verbosity) { return verbose = verbosity; } 00201 00202 protected: 00203 int* document; 00204 int document_length; 00205 int location; 00206 int uniq; 00207 WordContext *context; 00208 00209 int verbose; 00210 }; 00211 00212 #endif /* _WordKeySemantic_h */