00001
00002
00003
00004
00005
00006
00007 #include "config.h"
00008
00009 #ifndef lint
00010 static const char revid[] = "$Id: mp__bh_8c-source.html,v 1.1 2008/06/08 10:20:39 sebdiaz Exp $";
00011 #endif
00012
00013 #ifndef NO_SYSTEM_INCLUDES
00014 #include <sys/types.h>
00015
00016 #include <errno.h>
00017 #include <string.h>
00018 #include <unistd.h>
00019 #endif
00020
00021 #include "db_int.h"
00022 #include "db_shash.h"
00023 #include "mp.h"
00024 #include "db_page.h"
00025
00026 #ifdef DEBUG
00027 #include "WordMonitor.h"
00028 #endif
00029
00030 static int __memp_upgrade __P((DB_MPOOL *, DB_MPOOLFILE *, MPOOLFILE *));
00031
00032
00033
00034
00035
00036
00037
00038
00039 int
00040 CDB___memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
00041 DB_MPOOL *dbmp;
00042 MPOOLFILE *mfp;
00043 BH *bhp;
00044 int *restartp, *wrotep;
00045 {
00046 DB_MPOOLFILE *dbmfp;
00047 DB_MPREG *mpreg;
00048 int incremented, ret;
00049
00050 if (restartp != NULL)
00051 *restartp = 0;
00052 if (wrotep != NULL)
00053 *wrotep = 0;
00054 incremented = 0;
00055
00056
00057
00058
00059
00060
00061
00062 if (F_ISSET(mfp, MP_DEADFILE)) {
00063 dbmfp = NULL;
00064 goto found;
00065 }
00066
00067
00068
00069
00070
00071
00072
00073 MUTEX_THREAD_LOCK(dbmp->mutexp);
00074 for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
00075 dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
00076 if (dbmfp->mfp == mfp) {
00077 if (F_ISSET(dbmfp, MP_READONLY) &&
00078 __memp_upgrade(dbmp, dbmfp, mfp)) {
00079 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00080 return (0);
00081 }
00082
00083
00084
00085
00086
00087 ++dbmfp->ref;
00088 incremented = 1;
00089 break;
00090 }
00091 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00092 if (dbmfp != NULL)
00093 goto found;
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115 if (F_ISSET(mfp, MP_TEMP)) {
00116 DB_ASSERT(!F_ISSET(mfp, MP_DEADFILE));
00117 return (0);
00118 }
00119
00120
00121
00122
00123
00124
00125
00126 if (mfp->ftype != 0) {
00127 MUTEX_THREAD_LOCK(dbmp->mutexp);
00128 for (mpreg = LIST_FIRST(&dbmp->dbregq);
00129 mpreg != NULL; mpreg = LIST_NEXT(mpreg, q))
00130 if (mpreg->ftype == mfp->ftype)
00131 break;
00132 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00133 if (mpreg == NULL)
00134 return (0);
00135 }
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145 if (CDB___memp_fopen(dbmp, mfp, R_ADDR(dbmp->reginfo, mfp->path_off),
00146 0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0)
00147 return (0);
00148
00149 found: ret = CDB___memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep);
00150
00151 if (incremented) {
00152 MUTEX_THREAD_LOCK(dbmp->mutexp);
00153 --dbmfp->ref;
00154 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00155 }
00156
00157 return (ret);
00158 }
00159
00160
00161
00162
00163
00164
00165
00166 int
00167 CDB___memp_pgread(dbmfp, bhp, can_create)
00168 DB_MPOOLFILE *dbmfp;
00169 BH *bhp;
00170 int can_create;
00171 {
00172 DB_IO db_io;
00173 DB_ENV *dbenv;
00174 DB_MPOOL *dbmp;
00175 MPOOLFILE *mfp;
00176 size_t len, pagesize;
00177 size_t nr;
00178 int created, ret;
00179
00180 dbmp = dbmfp->dbmp;
00181 dbenv = dbmp->dbenv;
00182 mfp = dbmfp->mfp;
00183 pagesize = mfp->stat.st_pagesize;
00184
00185 F_SET(bhp, BH_LOCKED | BH_TRASH);
00186 MUTEX_LOCK(&bhp->mutex, dbenv->lockfhp);
00187 R_UNLOCK(dbenv, dbmp->reginfo);
00188
00189
00190
00191
00192
00193 nr = 0;
00194 if (F_ISSET(&dbmfp->fh, DB_FH_VALID)) {
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207 db_io.fhp = &dbmfp->fh;
00208 db_io.mutexp = dbmfp->mutexp;
00209 db_io.pagesize = db_io.bytes = pagesize;
00210 db_io.pgno = bhp->pgno;
00211 db_io.buf = bhp->buf;
00212
00213 if(F_ISSET(dbmfp, MP_CMPR)) {
00214 ret = CDB___memp_cmpr(dbmfp, bhp, &db_io, DB_IO_READ, &nr);
00215 } else {
00216 ret = CDB___os_io(dbenv, &db_io, DB_IO_READ, &nr);
00217 }
00218 #ifdef DEBUG
00219 if(ret == 0) {
00220 PAGE* pp = (PAGE*)db_io.buf;
00221 word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_READ, 1);
00222 switch(TYPE(pp)) {
00223 case P_IBTREE:
00224 word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_IBTREE, 1);
00225 break;
00226 case P_LBTREE:
00227 word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_LBTREE, 1);
00228 break;
00229 default:
00230 word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_UNKNOWN, 1);
00231 break;
00232 }
00233 }
00234 #endif
00235 } else
00236 ret = 0;
00237
00238 created = 0;
00239 if (nr < pagesize) {
00240 if (can_create)
00241 created = 1;
00242 else {
00243
00244
00245
00246
00247
00248
00249
00250 if (ret == 0)
00251 ret = EIO;
00252 goto err;
00253 }
00254 }
00255
00256
00257
00258
00259
00260
00261 if (nr != pagesize) {
00262 len = mfp->clear_len == 0 ? pagesize : mfp->clear_len;
00263 if (nr < len)
00264 memset(bhp->buf + nr, 0, len - nr);
00265 #ifdef DIAGNOSTIC
00266 if (nr > len)
00267 len = nr;
00268 if (len < pagesize)
00269 memset(bhp->buf + len, CLEAR_BYTE, pagesize - len);
00270 #endif
00271 }
00272
00273
00274 ret = mfp->ftype == 0 ? 0 : CDB___memp_pg(dbmfp, bhp, 1);
00275
00276
00277 err: MUTEX_UNLOCK(&bhp->mutex);
00278 R_LOCK(dbenv, dbmp->reginfo);
00279
00280
00281
00282
00283
00284 F_CLR(bhp, BH_LOCKED);
00285 if (ret == 0) {
00286 F_CLR(bhp, BH_TRASH);
00287
00288
00289 if (created)
00290 ++mfp->stat.st_page_create;
00291 else
00292 ++mfp->stat.st_page_in;
00293 }
00294
00295 return (ret);
00296 }
00297
00298
00299
00300
00301
00302
00303
00304
00305 int
00306 CDB___memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep)
00307 DB_MPOOL *dbmp;
00308 DB_MPOOLFILE *dbmfp;
00309 BH *bhp;
00310 int *restartp, *wrotep;
00311 {
00312 DB_ENV *dbenv;
00313 DB_IO db_io;
00314 DB_LSN lsn;
00315 MPOOL *c_mp, *mp;
00316 MPOOLFILE *mfp;
00317 size_t nw;
00318 int callpgin, dosync, ret, syncfail;
00319 const char *fail;
00320
00321 dbenv = dbmp->dbenv;
00322 mp = dbmp->reginfo[0].primary;
00323 mfp = dbmfp == NULL ? NULL : dbmfp->mfp;
00324
00325 if (restartp != NULL)
00326 *restartp = 0;
00327 if (wrotep != NULL)
00328 *wrotep = 0;
00329 callpgin = 0;
00330
00331
00332
00333
00334
00335 if (!F_ISSET(bhp, BH_DIRTY)) {
00336 if (wrotep != NULL)
00337 *wrotep = 1;
00338 return (0);
00339 }
00340
00341 MUTEX_LOCK(&bhp->mutex, dbenv->lockfhp);
00342
00343
00344
00345
00346
00347
00348 if (!F_ISSET(bhp, BH_DIRTY)) {
00349 MUTEX_UNLOCK(&bhp->mutex);
00350
00351 if (wrotep != NULL)
00352 *wrotep = 1;
00353 return (0);
00354 }
00355
00356 F_SET(bhp, BH_LOCKED);
00357 R_UNLOCK(dbenv, dbmp->reginfo);
00358
00359 if (restartp != NULL)
00360 *restartp = 1;
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371 if (mfp == NULL || F_ISSET(mfp, MP_DEADFILE))
00372 goto file_dead;
00373
00374
00375 if (LOGGING_ON(dbenv) || F_ISSET(bhp, BH_WRITE))
00376 memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
00377
00378
00379 if (LOGGING_ON(dbenv) && (ret = CDB_log_flush(dbenv, &lsn)) != 0)
00380 goto err;
00381
00382
00383
00384
00385
00386
00387 if (mfp->ftype == 0)
00388 ret = 0;
00389 else {
00390 callpgin = 1;
00391 if ((ret = CDB___memp_pg(dbmfp, bhp, 0)) != 0)
00392 goto err;
00393 }
00394
00395
00396 if (!F_ISSET(&dbmfp->fh, DB_FH_VALID)) {
00397 MUTEX_THREAD_LOCK(dbmp->mutexp);
00398 if (!F_ISSET(&dbmfp->fh, DB_FH_VALID) &&
00399 ((ret = CDB___db_appname(dbenv, DB_APP_TMP, NULL, NULL,
00400 DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_TEMP,
00401 &dbmfp->fh, NULL)) != 0 ||
00402 !F_ISSET(&dbmfp->fh, DB_FH_VALID))) {
00403 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00404 CDB___db_err(dbenv,
00405 "unable to create temporary backing file");
00406 goto err;
00407 }
00408 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00409 }
00410
00411
00412 db_io.fhp = &dbmfp->fh;
00413 db_io.mutexp = dbmfp->mutexp;
00414 db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize;
00415 db_io.pgno = bhp->pgno;
00416 db_io.buf = bhp->buf;
00417 #ifdef DEBUG
00418 {
00419 PAGE* pp = (PAGE*)db_io.buf;
00420 word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_WRITE, 1);
00421 switch(TYPE(pp)) {
00422 case P_IBTREE:
00423 word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_IBTREE, 1);
00424 break;
00425 case P_LBTREE:
00426 word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_LBTREE, 1);
00427 break;
00428 default:
00429 word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_UNKNOWN, 1);
00430 break;
00431 }
00432 }
00433 #endif
00434 if(F_ISSET(dbmfp, MP_CMPR)) {
00435 ret = CDB___memp_cmpr(dbmfp, bhp, &db_io, DB_IO_WRITE, &nw);
00436 } else {
00437 if(db_io.pgno == PGNO_BASE_MD) {
00438
00439
00440
00441
00442
00443
00444
00445
00446 size_t required = db_io.pagesize;
00447 size_t orig_bytes = db_io.bytes;
00448 db_io.bytes = DBMETASIZE;
00449 for(db_io.pagesize = DBMETASIZE; db_io.pagesize < required; db_io.pagesize <<= 1) {
00450 ret = CDB___os_io(dbenv, &db_io, DB_IO_WRITE, &nw);
00451 if(ret != 0 || nw != DBMETASIZE)
00452 break;
00453 }
00454 db_io.bytes = orig_bytes;
00455 db_io.pagesize = required;
00456 if(ret == 0)
00457 ret = CDB___os_io(dbenv, &db_io, DB_IO_WRITE, &nw);
00458 } else {
00459 ret = CDB___os_io(dbenv, &db_io, DB_IO_WRITE, &nw);
00460 }
00461 }
00462 if (ret != 0) {
00463 CDB___db_panic(dbenv, ret);
00464 fail = "write";
00465 goto syserr;
00466 }
00467 if (nw != mfp->stat.st_pagesize) {
00468 ret = EIO;
00469 fail = "write";
00470 goto syserr;
00471 }
00472
00473 file_dead:
00474
00475
00476
00477
00478
00479
00480
00481 MUTEX_UNLOCK(&bhp->mutex);
00482 R_LOCK(dbenv, dbmp->reginfo);
00483
00484
00485
00486
00487
00488
00489
00490 if (callpgin)
00491 F_SET(bhp, BH_CALLPGIN);
00492 F_CLR(bhp, BH_DIRTY | BH_LOCKED);
00493
00494
00495
00496
00497
00498
00499
00500 dosync = 0;
00501 if (F_ISSET(bhp, BH_WRITE)) {
00502 F_CLR(bhp, BH_WRITE);
00503
00504 --mp->lsn_cnt;
00505 if (mfp != NULL)
00506 dosync = --mfp->lsn_cnt == 0 ? 1 : 0;
00507 }
00508
00509
00510 c_mp = BH_TO_CACHE(dbmp, bhp);
00511 ++c_mp->stat.st_page_clean;
00512 --c_mp->stat.st_page_dirty;
00513
00514
00515 if (mfp != NULL)
00516 ++mfp->stat.st_page_out;
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533 if (dosync) {
00534 R_UNLOCK(dbenv, dbmp->reginfo);
00535 syncfail = CDB___os_fsync(dbenv, &dbmfp->fh) != 0;
00536 R_LOCK(dbenv, dbmp->reginfo);
00537 if (syncfail)
00538 F_SET(mp, MP_LSN_RETRY);
00539 }
00540
00541 if (wrotep != NULL)
00542 *wrotep = 1;
00543
00544 return (0);
00545
00546 syserr: CDB___db_err(dbenv, "%s: %s failed for page %lu",
00547 CDB___memp_fn(dbmfp), fail, (u_long)bhp->pgno);
00548
00549 err:
00550 MUTEX_UNLOCK(&bhp->mutex);
00551 R_LOCK(dbenv, dbmp->reginfo);
00552
00553
00554
00555
00556
00557
00558
00559 if (callpgin)
00560 F_SET(bhp, BH_CALLPGIN);
00561 F_CLR(bhp, BH_LOCKED);
00562
00563 return (ret);
00564 }
00565
00566
00567
00568
00569
00570
00571
00572 int
00573 CDB___memp_pg(dbmfp, bhp, is_pgin)
00574 DB_MPOOLFILE *dbmfp;
00575 BH *bhp;
00576 int is_pgin;
00577 {
00578 DBT dbt, *dbtp;
00579 DB_MPOOL *dbmp;
00580 DB_MPREG *mpreg;
00581 MPOOLFILE *mfp;
00582 int ftype, ret;
00583
00584 dbmp = dbmfp->dbmp;
00585 mfp = dbmfp->mfp;
00586
00587 MUTEX_THREAD_LOCK(dbmp->mutexp);
00588
00589 ftype = mfp->ftype;
00590 for (mpreg = LIST_FIRST(&dbmp->dbregq);
00591 mpreg != NULL; mpreg = LIST_NEXT(mpreg, q)) {
00592 if (ftype != mpreg->ftype)
00593 continue;
00594 if (mfp->pgcookie_len == 0)
00595 dbtp = NULL;
00596 else {
00597 dbt.size = mfp->pgcookie_len;
00598 dbt.data = R_ADDR(dbmp->reginfo, mfp->pgcookie_off);
00599 dbtp = &dbt;
00600 }
00601 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00602
00603 if (is_pgin) {
00604 if (mpreg->pgin != NULL &&
00605 (ret = mpreg->pgin(dbmp->dbenv,
00606 bhp->pgno, bhp->buf, dbtp)) != 0)
00607 goto err;
00608 } else
00609 if (mpreg->pgout != NULL &&
00610 (ret = mpreg->pgout(dbmp->dbenv,
00611 bhp->pgno, bhp->buf, dbtp)) != 0)
00612 goto err;
00613 break;
00614 }
00615
00616 if (mpreg == NULL)
00617 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00618
00619 return (0);
00620
00621 err: MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00622 CDB___db_err(dbmp->dbenv, "%s: %s failed for page %lu",
00623 CDB___memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
00624 return (ret);
00625 }
00626
00627
00628
00629
00630
00631
00632
00633 void
00634 CDB___memp_bhfree(dbmp, bhp, free_mem)
00635 DB_MPOOL *dbmp;
00636 BH *bhp;
00637 int free_mem;
00638 {
00639 DB_HASHTAB *dbht;
00640 MPOOL *c_mp, *mp;
00641 MPOOLFILE *mfp;
00642 int n_bucket, n_cache;
00643
00644 mp = dbmp->reginfo[0].primary;
00645 c_mp = BH_TO_CACHE(dbmp, bhp);
00646 n_cache = NCACHE(mp, bhp->pgno);
00647 n_bucket = NBUCKET(c_mp, bhp->mf_offset, bhp->pgno);
00648 dbht = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
00649
00650
00651 SH_TAILQ_REMOVE(&dbht[n_bucket], bhp, hq, __bh);
00652
00653
00654 SH_TAILQ_REMOVE(&c_mp->bhq, bhp, q, __bh);
00655
00656
00657
00658
00659
00660 mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
00661 if (--mfp->ref_cnt == 0)
00662 CDB___memp_mf_discard(dbmp, mfp);
00663
00664
00665
00666
00667
00668 CDB___memp_cmpr_free_chain(dbmp, bhp);
00669 if (free_mem) {
00670 --c_mp->stat.st_page_clean;
00671 CDB___db_shalloc_free(dbmp->reginfo[n_cache].addr, bhp);
00672 }
00673 }
00674
00675
00676
00677
00678
00679 static int
00680 __memp_upgrade(dbmp, dbmfp, mfp)
00681 DB_MPOOL *dbmp;
00682 DB_MPOOLFILE *dbmfp;
00683 MPOOLFILE *mfp;
00684 {
00685 DB_FH fh;
00686 int ret;
00687 char *rpath;
00688
00689
00690
00691
00692
00693
00694
00695 if (F_ISSET(dbmfp, MP_UPGRADE))
00696 return (0);
00697
00698
00699 if (F_ISSET(dbmfp, MP_UPGRADE_FAIL))
00700 return (1);
00701
00702
00703
00704
00705
00706
00707 if ((ret = CDB___db_appname(dbmp->dbenv, DB_APP_DATA,
00708 NULL, R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) != 0)
00709 return (ret);
00710 if (CDB___os_open(dbmp->dbenv, rpath, 0, 0, &fh) != 0) {
00711 F_SET(dbmfp, MP_UPGRADE_FAIL);
00712 ret = 1;
00713 } else {
00714
00715 (void)CDB___os_closehandle(&dbmfp->fh);
00716 dbmfp->fh = fh;
00717 F_SET(dbmfp, MP_UPGRADE);
00718 ret = 0;
00719 }
00720 CDB___os_freestr(rpath);
00721 return (ret);
00722 }