00001 // 00002 // mifluz.h 00003 // 00004 // NAME 00005 // C++ library to use and manage inverted indexes 00006 // 00007 // SYNOPSIS 00008 // #include <mifluz.h> 00009 // 00010 // main() 00011 // { 00012 // Configuration* config = WordContext::Initialize(); 00013 // 00014 // WordList* words = new WordList(*config); 00015 // 00016 // ... 00017 // 00018 // delete words; 00019 // 00020 // WordContext::Finish(); 00021 // } 00022 // 00023 // DESCRIPTION 00024 // 00025 // The purpose of <i>mifluz</i> is to provide a C++ library to build and query a 00026 // full text inverted index. It is dynamically updatable, scalable (up to 00027 // 1Tb indexes), uses a controlled amount of memory, shares index files 00028 // and memory cache among processes or threads and compresses index files 00029 // to 50% of the raw data. The structure of the index is configurable at 00030 // runtime and allows inclusion of relevance ranking information. The 00031 // query functions do not require loading all the occurrences of a 00032 // searched term. They consume very few resources and many searches can 00033 // be run in parallel. 00034 // 00035 // The file management library used in mifluz is a modified Berkeley DB 00036 // (www.sleepycat.com) version 3.1.14. 00037 // 00038 // CONFIGURATION 00039 // 00040 // wordlist_wordkey_document [field ...] (default none) 00041 // A white space separated list of field numbers that define a document. 00042 // The field number list must not contain gaps. For instance 1 2 3 is 00043 // valid but 1 3 4 is not valid. 00044 // This configuration parameter is not used by the mifluz library 00045 // but may be used by a query application to define the semantic of 00046 // a document. In response to a query, the application will return a 00047 // list of results in which only distinct documents will be shown. 00048 // 00049 // wordlist_wordkey_location field (default none) 00050 // A single field number that contains the position of a word in a 00051 // given document. 00052 // This configuration parameter is not used by the mifluz library 00053 // but may be used by a query application. 00054 // 00055 // wordlist_wordkey_uniq field (default none) 00056 // A single field number on which a uniq sort of a document list 00057 // will be done. This field is used by mifluzsearch(1) to reduce 00058 // the list of matching URLs to one URL per server, for instance. 00059 // 00060 // LOCALE SUPPORT 00061 // 00062 // Words are compared using the strcoll(3) function when inserted and 00063 // searched in the index. See setlocale(3) for more information on 00064 // locale support. Only 8 bits character sets are supported. 00065 // 00066 // ENVIRONMENT 00067 // 00068 // <b>MIFLUZ_CONFIG</b> file name of configuration file read by 00069 // WordContext(3). Defaults to <b>~/.mifluz.</b> or <b>/usr/etc/mifluz.conf</b> 00070 // 00071 // END 00072 // 00073 // Part of the ht://Dig package <http://www.htdig.org/> 00074 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group 00075 // For copyright details, see the file COPYING in your distribution 00076 // or the GNU General Public License version 2 or later 00077 // <http://www.gnu.org/copyleft/gpl.html> 00078 // 00079 // $Id: mifluz_8h-source.html,v 1.1 2008/06/08 10:20:32 sebdiaz Exp $ 00080 // 00081 00082 #ifndef _mifluz_h_ 00083 #define _mifluz_h_ 00084 00085 #include <mifluz/config.h> 00086 #include <mifluz/WordContext.h> 00087 #include <mifluz/WordList.h> 00088 00089 #endif /* _mifluz_h */