00001 /*- 00002 * See the file LICENSE for redistribution information. 00003 * 00004 * Copyright (c) 1996, 1997, 1998, 1999, 2000 00005 * Sleepycat Software. All rights reserved. 00006 * 00007 * $Id: mp_8h-source.html,v 1.1 2008/06/08 10:20:35 sebdiaz Exp $ 00008 */ 00009 00010 struct __bh; typedef struct __bh BH; 00011 struct __db_mpool; typedef struct __db_mpool DB_MPOOL; 00012 struct __db_mpreg; typedef struct __db_mpreg DB_MPREG; 00013 struct __mpool; typedef struct __mpool MPOOL; 00014 struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; 00015 struct __cmpr; typedef struct __cmpr CMPR; 00016 00017 /* We require at least 40K of cache. */ 00018 #define DB_CACHESIZE_MIN (20 * 1024) 00019 00020 /* 00021 * DB_MPOOL -- 00022 * Per-process memory pool structure. 00023 */ 00024 struct __db_mpool { 00025 /* These fields need to be protected for multi-threaded support. */ 00026 MUTEX *mutexp; /* Structure thread lock. */ 00027 00028 /* List of pgin/pgout routines. */ 00029 LIST_HEAD(__db_mpregh, __db_mpreg) dbregq; 00030 00031 /* List of DB_MPOOLFILE's. */ 00032 TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq; 00033 00034 /* These fields are not thread-protected. */ 00035 DB_ENV *dbenv; /* Reference to error information. */ 00036 00037 u_int32_t nreg; /* N underlying cache regions. */ 00038 REGINFO *reginfo; /* Underlying cache regions. */ 00039 }; 00040 00041 /* 00042 * DB_MPREG -- 00043 * DB_MPOOL registry of pgin/pgout functions. 00044 */ 00045 struct __db_mpreg { 00046 LIST_ENTRY(__db_mpreg) q; /* Linked list. */ 00047 00048 int ftype; /* File type. */ 00049 /* Pgin, pgout routines. */ 00050 int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *)); 00051 int (*pgout) __P((DB_ENV *, db_pgno_t, void *, DBT *)); 00052 }; 00053 00054 /* 00055 * DB_MPOOLFILE -- 00056 * Per-process DB_MPOOLFILE information. 00057 */ 00058 struct __db_mpoolfile { 00059 /* These fields need to be protected for multi-threaded support. */ 00060 MUTEX *mutexp; /* Structure thread lock. */ 00061 00062 DB_FH fh; /* Underlying file handle. */ 00063 00064 u_int32_t ref; /* Reference count. */ 00065 00066 /* 00067 * !!! 00068 * This field is a special case -- it's protected by the region lock 00069 * NOT the thread lock. The reason for this is that we always have 00070 * the region lock immediately before or after we modify the field, 00071 * and we don't want to use the structure lock to protect it because 00072 * then I/O (which is done with the structure lock held because of 00073 * the race between the seek and write of the file descriptor) will 00074 * block any other put/get calls using this DB_MPOOLFILE structure. 00075 */ 00076 u_int32_t pinref; /* Pinned block reference count. */ 00077 00078 /* 00079 * !!! 00080 * This field is a special case -- it's protected by the region lock 00081 * since it's manipulated only when new files are added to the list. 00082 */ 00083 TAILQ_ENTRY(__db_mpoolfile) q; /* Linked list of DB_MPOOLFILE's. */ 00084 00085 /* These fields are not thread-protected. */ 00086 DB_MPOOL *dbmp; /* Overlying DB_MPOOL. */ 00087 MPOOLFILE *mfp; /* Underlying MPOOLFILE. */ 00088 00089 void *addr; /* Address of mmap'd region. */ 00090 size_t len; /* Length of mmap'd region. */ 00091 00092 /* These fields need to be protected for multi-threaded support. */ 00093 #define MP_READONLY 0x01 /* File is readonly. */ 00094 #define MP_UPGRADE 0x02 /* File descriptor is readwrite. */ 00095 #define MP_UPGRADE_FAIL 0x04 /* Upgrade wasn't possible. */ 00096 #define MP_CMPR 0x08 /* Transparent I/O compression. */ 00097 u_int32_t flags; 00098 }; 00099 00100 /* 00101 * NCACHE -- 00102 * Select a cache based on the page number. This assumes accesses are 00103 * uniform across pages, which is probably OK -- what we really want to 00104 * avoid is anything that puts all the pages for any single file in the 00105 * same cache, as we expect that file access will be bursty. 00106 */ 00107 #define NCACHE(mp, pgno) \ 00108 ((pgno) % ((MPOOL *)mp)->nreg) 00109 00110 /* 00111 * NBUCKET -- 00112 * We make the assumption that early pages of the file are more likely 00113 * to be retrieved than the later pages, which means the top bits will 00114 * be more interesting for hashing as they're less likely to collide. 00115 * That said, as 512 8K pages represents a 4MB file, so only reasonably 00116 * large files will have page numbers with any other than the bottom 9 00117 * bits set. We XOR in the MPOOL offset of the MPOOLFILE that backs the 00118 * page, since that should also be unique for the page. We don't want 00119 * to do anything very fancy -- speed is more important to us than using 00120 * good hashing. 00121 */ 00122 #define NBUCKET(mc, mf_offset, pgno) \ 00123 (((pgno) ^ ((mf_offset) << 9)) % (mc)->htab_buckets) 00124 00125 /* 00126 * MPOOL -- 00127 * Shared memory pool region. 00128 */ 00129 struct __mpool { 00130 /* 00131 * The memory pool can be broken up into individual pieces/files. 00132 * Not what we would have liked, but on Solaris you can allocate 00133 * only a little more than 2GB of memory in a contiguous chunk, 00134 * and I expect to see more systems with similar issues. 00135 * 00136 * The first of these pieces/files describes the entire pool, all 00137 * subsequent ones only describe a part of the cache. 00138 * 00139 * We single-thread CDB_memp_sync and CDB_memp_fsync calls. 00140 * 00141 * This mutex is intended *only* to single-thread access to the call, 00142 * it is not used to protect the lsn and lsn_cnt fields, the region 00143 * lock is used to protect them. 00144 */ 00145 MUTEX sync_mutex; /* Checkpoint lock. */ 00146 DB_LSN lsn; /* Maximum checkpoint LSN. */ 00147 u_int32_t lsn_cnt; /* Checkpoint buffers left to write. */ 00148 00149 SH_TAILQ_HEAD(__mpfq) mpfq; /* List of MPOOLFILEs. */ 00150 00151 u_int32_t nreg; /* Number of underlying REGIONS. */ 00152 roff_t regids; /* Array of underlying REGION Ids. */ 00153 00154 #define MP_LSN_RETRY 0x01 /* Retry all BH_WRITE buffers. */ 00155 u_int32_t flags; 00156 00157 /* 00158 * The following structure fields only describe the cache portion of 00159 * the region. 00160 */ 00161 SH_TAILQ_HEAD(__bhq) bhq; /* LRU list of buffer headers. */ 00162 00163 int htab_buckets; /* Number of hash table entries. */ 00164 roff_t htab; /* Hash table offset. */ 00165 00166 DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */ 00167 }; 00168 00169 /* 00170 * MPOOLFILE -- 00171 * Shared DB_MPOOLFILE information. 00172 */ 00173 struct __mpoolfile { 00174 SH_TAILQ_ENTRY q; /* List of MPOOLFILEs */ 00175 00176 db_pgno_t ref_cnt; /* Ref count: pages or DB_MPOOLFILEs. */ 00177 db_pgno_t lsn_cnt; /* Checkpoint buffers left to write. */ 00178 00179 int ftype; /* File type. */ 00180 int32_t lsn_off; /* Page's LSN offset. */ 00181 u_int32_t clear_len; /* Bytes to clear on page create. */ 00182 00183 roff_t path_off; /* File name location. */ 00184 roff_t fileid_off; /* File identification location. */ 00185 00186 roff_t pgcookie_len; /* Pgin/pgout cookie length. */ 00187 roff_t pgcookie_off; /* Pgin/pgout cookie location. */ 00188 00189 db_pgno_t last_pgno; /* Last page in the file. */ 00190 db_pgno_t orig_last_pgno; /* Original last page in the file. */ 00191 00192 db_pgno_t cmpr_free; /* Compression free list. */ 00193 00194 DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */ 00195 00196 #define MP_CAN_MMAP 0x01 /* If the file can be mmap'd. */ 00197 #define MP_DEADFILE 0x02 /* Dirty pages can simply be trashed. */ 00198 #define MP_TEMP 0x04 /* Backing file is a temporary. */ 00199 u_int32_t flags; 00200 }; 00201 00202 /* 00203 * BH_TO_CACHE -- 00204 * Return the cache where we can find the specified buffer header. 00205 */ 00206 #define BH_TO_CACHE(dbmp, bhp) \ 00207 (dbmp)->reginfo[NCACHE((dbmp)->reginfo[0].primary, (bhp)->pgno)].primary 00208 00209 /* 00210 * DB_CMPR -- 00211 * Page compression information 00212 * 00213 * !!! 00214 * There is no need to keep the length of the data wrote 00215 * in the page since it's already encoded in the compressed 00216 * data. 00217 */ 00218 00219 /* 00220 * Convert size to expected compressed size 00221 */ 00222 #define DB_CMPR_DIVIDE(dbenv, size) ((size) >> CDB___memp_cmpr_coefficient(dbenv) ) 00223 #define DB_CMPR_MULTIPLY(dbenv, size) ((size) << CDB___memp_cmpr_coefficient(dbenv) ) 00224 00225 struct __cmpr { 00226 #define DB_CMPR_FIRST 0x01 /* Head of chain. */ 00227 #define DB_CMPR_INTERNAL 0x02 /* Weak compression data. */ 00228 #define DB_CMPR_CHAIN 0x04 /* More data in next page. */ 00229 #define DB_CMPR_FREE 0x08 /* Not in use. */ 00230 00231 u_int16_t flags; 00232 00233 /* 00234 * Filled if DB_CMPR_CHAIN set 00235 */ 00236 db_pgno_t next; 00237 }; 00238 00239 /* 00240 * Reserved information at the beginning of each compressed page 00241 */ 00242 #define DB_CMPR_OVERHEAD sizeof(struct __cmpr) 00243 /* 00244 * Size of IO page, without the reserved information 00245 */ 00246 #define DB_CMPR_PAGESIZE(io) (io->pagesize - DB_CMPR_OVERHEAD) 00247 /* 00248 * Pointer to data within raw compressed buffer 00249 */ 00250 #define DB_CMPR_DATA(io) (io->buf + DB_CMPR_OVERHEAD) 00251 00252 /* 00253 * BH -- 00254 * Buffer header. 00255 */ 00256 struct __bh { 00257 MUTEX mutex; /* Buffer thread/process lock. */ 00258 00259 u_int16_t ref; /* Reference count. */ 00260 00261 #define BH_CALLPGIN 0x001 /* Page needs to be reworked... */ 00262 #define BH_DIRTY 0x002 /* Page was modified. */ 00263 #define BH_DISCARD 0x004 /* Page is useless. */ 00264 #define BH_LOCKED 0x008 /* Page is locked (I/O in progress). */ 00265 #define BH_TRASH 0x010 /* Page is garbage. */ 00266 #define BH_WRITE 0x020 /* Page scheduled for writing. */ 00267 #define BH_CMPR 0x040 /* Chain contains valid data. */ 00268 #define BH_CMPR_POOL 0x080 /* Chain allocated in pool. */ 00269 #define BH_CMPR_OS 0x100 /* Chain allocate with malloc. */ 00270 u_int16_t flags; 00271 00272 db_pgno_t *chain; /* Compression chain. */ 00273 00274 SH_TAILQ_ENTRY q; /* LRU queue. */ 00275 SH_TAILQ_ENTRY hq; /* MPOOL hash bucket queue. */ 00276 00277 db_pgno_t pgno; /* Underlying MPOOLFILE page number. */ 00278 roff_t mf_offset; /* Associated MPOOLFILE offset. */ 00279 00280 /* 00281 * !!! 00282 * This array must be size_t aligned -- the DB access methods put PAGE 00283 * and other structures into it, and expect to be able to access them 00284 * directly. (We guarantee size_t alignment in the documentation too.) 00285 */ 00286 u_int8_t buf[1]; /* Variable length data. */ 00287 }; 00288 00289 #include "mp_ext.h"