00001 /*- 00002 * See the file LICENSE for redistribution information. 00003 * 00004 * Copyright (c) 1996, 1997, 1998, 1999, 2000 00005 * Sleepycat Software. All rights reserved. 00006 */ 00007 /* 00008 * Copyright (c) 1990, 1993, 1994, 1995, 1996 00009 * Keith Bostic. All rights reserved. 00010 */ 00011 /* 00012 * Copyright (c) 1990, 1993, 1994, 1995 00013 * The Regents of the University of California. All rights reserved. 00014 * 00015 * This code is derived from software contributed to Berkeley by 00016 * Mike Olson. 00017 * 00018 * Redistribution and use in source and binary forms, with or without 00019 * modification, are permitted provided that the following conditions 00020 * are met: 00021 * 1. Redistributions of source code must retain the above copyright 00022 * notice, this list of conditions and the following disclaimer. 00023 * 2. Redistributions in binary form must reproduce the above copyright 00024 * notice, this list of conditions and the following disclaimer in the 00025 * documentation and/or other materials provided with the distribution. 00026 * 3. Neither the name of the University nor the names of its contributors 00027 * may be used to endorse or promote products derived from this software 00028 * without specific prior written permission. 00029 * 00030 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 00031 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00032 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00033 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 00034 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00035 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00036 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00037 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00038 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00039 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00040 * SUCH DAMAGE. 00041 * 00042 * $Id: btree_8h-source.html,v 1.1 2008/06/08 10:13:49 sebdiaz Exp $ 00043 */ 00044 00045 /* Forward structure declarations. */ 00046 struct __btree; typedef struct __btree BTREE; 00047 struct __cursor; typedef struct __cursor BTREE_CURSOR; 00048 struct __epg; typedef struct __epg EPG; 00049 struct __recno; typedef struct __recno RECNO; 00050 00051 #define DEFMINKEYPAGE (2) 00052 00053 #define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) 00054 #define ISLEAF(p) (TYPE(p) == P_LBTREE || \ 00055 TYPE(p) == P_LRECNO || TYPE(p) == P_LDUP) 00056 00057 /* Flags for CDB___bam_cadjust_log(). */ 00058 #define CAD_UPDATEROOT 0x01 /* Root page count was updated. */ 00059 00060 /* Flags for CDB___bam_split_log(). */ 00061 #define SPL_NRECS 0x01 /* Split tree has record count. */ 00062 00063 /* Flags for CDB___bam_iitem(). */ 00064 #define BI_DELETED 0x01 /* Key/data pair only placeholder. */ 00065 00066 /* Flags for CDB___bam_stkrel(). */ 00067 #define STK_CLRDBC 0x01 /* Clear dbc->page reference. */ 00068 #define STK_NOLOCK 0x02 /* Don't retain locks. */ 00069 00070 /* Flags for __ram_ca(). */ 00071 typedef enum { 00072 CA_DELETE, 00073 CA_IAFTER, 00074 CA_IBEFORE 00075 } ca_recno_arg; 00076 00077 /* 00078 * Flags for CDB___bam_search() and CDB___bam_rsearch(). 00079 * 00080 * Note, internal page searches must find the largest record less than key in 00081 * the tree so that descents work. Leaf page searches must find the smallest 00082 * record greater than key so that the returned index is the record's correct 00083 * position for insertion. 00084 * 00085 * The flags parameter to the search routines describes three aspects of the 00086 * search: the type of locking required (including if we're locking a pair of 00087 * pages), the item to return in the presence of duplicates and whether or not 00088 * to return deleted entries. To simplify both the mnemonic representation 00089 * and the code that checks for various cases, we construct a set of bitmasks. 00090 */ 00091 #define S_READ 0x00001 /* Read locks. */ 00092 #define S_WRITE 0x00002 /* Write locks. */ 00093 00094 #define S_APPEND 0x00040 /* Append to the tree. */ 00095 #define S_DELNO 0x00080 /* Don't return deleted items. */ 00096 #define S_DUPFIRST 0x00100 /* Return first duplicate. */ 00097 #define S_DUPLAST 0x00200 /* Return last duplicate. */ 00098 #define S_EXACT 0x00400 /* Exact items only. */ 00099 #define S_PARENT 0x00800 /* Lock page pair. */ 00100 #define S_STACK 0x01000 /* Need a complete stack. */ 00101 #define S_PAST_EOF 0x02000 /* If doing insert search (or keyfirst 00102 * or keylast operations), or a split 00103 * on behalf of an insert, it's okay to 00104 * return an entry one past end-of-page. 00105 */ 00106 #define S_STK_ONLY 0x04000 /* Just return info in the stack */ 00107 00108 #define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK) 00109 #define S_FIND (S_READ | S_DUPFIRST | S_DELNO) 00110 #define S_FIND_WR (S_WRITE | S_DUPFIRST | S_DELNO) 00111 #define S_INSERT (S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK) 00112 #define S_KEYFIRST (S_WRITE | S_DUPFIRST | S_PAST_EOF | S_STACK) 00113 #define S_KEYLAST (S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK) 00114 #define S_WRPAIR (S_WRITE | S_DUPLAST | S_PAST_EOF | S_PARENT) 00115 00116 /* 00117 * Various routines pass around page references. A page reference is 00118 * a pointer to the page, and the indx indicates an item on the page. 00119 * Each page reference may include a lock. 00120 */ 00121 struct __epg { 00122 PAGE *page; /* The page. */ 00123 db_indx_t indx; /* The index on the page. */ 00124 db_indx_t entries; /* The number of entries on page */ 00125 DB_LOCK lock; /* The page's lock. */ 00126 db_lockmode_t lock_mode; /* The lock mode. */ 00127 }; 00128 00129 /* 00130 * We maintain a stack of the pages that we're locking in the tree. Grow 00131 * the stack as necessary. 00132 */ 00133 #define BT_STK_CLR(c) \ 00134 ((c)->csp = (c)->sp) 00135 00136 #define BT_STK_ENTER(dbenv, c, pagep, page_indx, l, mode, ret) do { \ 00137 if ((ret = \ 00138 (c)->csp == (c)->esp ? CDB___bam_stkgrow(dbenv, c) : 0) == 0) { \ 00139 (c)->csp->page = pagep; \ 00140 (c)->csp->indx = page_indx; \ 00141 (c)->csp->entries = NUM_ENT(pagep); \ 00142 (c)->csp->lock = l; \ 00143 (c)->csp->lock_mode = mode; \ 00144 } \ 00145 } while (0) 00146 00147 #define BT_STK_PUSH(dbenv, c, pagep, page_indx, lock, mode, ret) do { \ 00148 BT_STK_ENTER(dbenv, c, pagep, page_indx, lock, mode, ret); \ 00149 ++(c)->csp; \ 00150 } while (0) 00151 00152 #define BT_STK_NUM(dbenv, c, pagep, page_indx, ret) do { \ 00153 if ((ret = \ 00154 (c)->csp == (c)->esp ? CDB___bam_stkgrow(dbenv, c) : 0) == 0) { \ 00155 (c)->csp->page = NULL; \ 00156 (c)->csp->indx = page_indx; \ 00157 (c)->csp->entries = NUM_ENT(pagep); \ 00158 (c)->csp->lock.off = LOCK_INVALID; \ 00159 (c)->csp->lock_mode = DB_LOCK_NG; \ 00160 } \ 00161 } while (0) 00162 00163 #define BT_STK_NUMPUSH(dbenv, c, pagep, page_indx,ret) do { \ 00164 BT_STK_NUM(dbenv, cp, pagep, page_indx, ret); \ 00165 ++(c)->csp; \ 00166 } while (0) 00167 00168 #define BT_STK_POP(c) \ 00169 ((c)->csp == (c)->stack ? NULL : --(c)->csp) 00170 00171 /* Btree/Recno cursor. */ 00172 struct __cursor { 00173 /* struct __dbc_internal */ 00174 __DBC_INTERNAL 00175 00176 /* btree private part */ 00177 EPG *sp; /* Stack pointer. */ 00178 EPG *csp; /* Current stack entry. */ 00179 EPG *esp; /* End stack pointer. */ 00180 EPG stack[5]; 00181 00182 db_indx_t ovflsize; /* Maximum key/data on-page size. */ 00183 00184 db_recno_t recno; /* Current record number. */ 00185 00186 /* 00187 * Btree: 00188 * We set a flag in the cursor structure if the underlying object has 00189 * been deleted. It's not strictly necessary, we could get the same 00190 * information by looking at the page itself, but this method doesn't 00191 * require us to retrieve the page on cursor delete. 00192 * 00193 * Recno: 00194 * When renumbering recno databases during deletes, cursors referencing 00195 * "deleted" records end up positioned between two records, and so must 00196 * be specially adjusted on the next operation. 00197 */ 00198 #define C_DELETED 0x0001 /* Record was deleted. */ 00199 /* 00200 * There are three tree types that require maintaining record numbers. 00201 * Recno AM trees, Btree AM trees for which the DB_RECNUM flag was set, 00202 * and Btree off-page duplicate trees. 00203 */ 00204 #define C_RECNUM 0x0002 /* Tree requires record counts. */ 00205 /* 00206 * Recno trees have immutable record numbers by default, but optionally 00207 * support mutable record numbers. Off-page duplicate Recno trees have 00208 * mutable record numbers. All Btrees with record numbers (including 00209 * off-page duplicate trees) are mutable by design, no flag is needed. 00210 */ 00211 #define C_RENUMBER 0x0004 /* Tree records are mutable. */ 00212 u_int32_t flags; 00213 }; 00214 00215 /* 00216 * The in-memory, per-tree btree/recno data structure. 00217 */ 00218 struct __btree { /* Btree access method. */ 00219 db_pgno_t bt_lpgno; /* Last insert location. */ 00220 00221 db_pgno_t bt_meta; /* Database meta-data page. */ 00222 db_pgno_t bt_root; /* Database root page. */ 00223 00224 u_int32_t bt_maxkey; /* Maximum keys per page. */ 00225 u_int32_t bt_minkey; /* Minimum keys per page. */ 00226 00227 /* Btree comparison function. */ 00228 int (*bt_compare) __P((const DBT *, const DBT *)); 00229 /* Btree prefix function. */ 00230 size_t (*bt_prefix) __P((const DBT *, const DBT *)); 00231 00232 /* Recno access method. */ 00233 int re_pad; /* Fixed-length padding byte. */ 00234 int re_delim; /* Variable-length delimiting byte. */ 00235 u_int32_t re_len; /* Length for fixed-length records. */ 00236 char *re_source; /* Source file name. */ 00237 00238 /* 00239 * !!! 00240 * These fields are ignored as far as multi-threading is concerned. 00241 * There are no transaction semantics associated with backing files, 00242 * nor is there any thread protection. 00243 */ 00244 DB_FH re_fh; /* Source file handle. */ 00245 db_recno_t re_last; /* Last record number read. */ 00246 void *re_cmap; /* Current point in mapped space. */ 00247 void *re_smap; /* Start of mapped space. */ 00248 void *re_emap; /* End of mapped space. */ 00249 size_t re_msize; /* Size of mapped region. */ 00250 /* Recno input function. */ 00251 int (*re_irec) __P((DBC *, db_recno_t)); 00252 00253 #define RECNO_MODIFIED 0x01 /* Tree was modified. */ 00254 #define RECNO_READFILE 0x02 /* Backing source file to read. */ 00255 u_int32_t flags; 00256 }; 00257 00258 #include "btree_auto.h" 00259 #include "btree_ext.h" 00260 #include "db_am.h"