From d6f8fa0131deb92973dbc5c571a1cb612b127ab5 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 3 Mar 2014 15:24:59 -0800 Subject: [PATCH] Implement CLOCK buffer pool page replacement method --- btree2u.c | 136 +++++++++++++++++++++++++++++----------------------- threads2h.c | 116 ++++++++++++++------------------------------ threads2i.c | 114 ++++++++++++++----------------------------- threads2j.c | 114 ++++++++++++++----------------------------- 4 files changed, 181 insertions(+), 299 deletions(-) diff --git a/btree2u.c b/btree2u.c index bd97dd2..2d2a3d0 100644 --- a/btree2u.c +++ b/btree2u.c @@ -43,6 +43,7 @@ REDISTRIBUTION OF THIS SOFTWARE. #include #include #include +#include #endif #include @@ -169,6 +170,7 @@ typedef struct { BtSpinLatch readwr[1]; // read/write page lock BtSpinLatch access[1]; // Access Intent/Page delete BtSpinLatch parent[1]; // Posting of fence key in parent + volatile ushort clock; // accessed since last clock pass volatile uint next; // next entry in hash table chain volatile uint prev; // prev entry in hash table chain volatile uint pin; // number of outstanding pins @@ -539,6 +541,7 @@ BtLatchSet *latch = bt->latchsets + slot; bt->table[hashidx].slot = slot; latch->page_no = page_no; + latch->clock = 1; latch->prev = 0; latch->pin = 1; @@ -568,7 +571,7 @@ off64_t off; uint amt[1]; BtPage page; - // try to find unpinned entry + // try to find our entry bt_spinwritelock(bt->table[hashidx].latch); @@ -579,33 +582,12 @@ BtPage page; break; } while( slot = latch->next ); - // found our entry, bring to front of hash chain + // found our entry if( slot ) { latch = bt->latchsets + slot; -#ifdef unix - __sync_fetch_and_add(&latch->pin, 1); -#else - _InterlockedIncrement (&latch->pin); -#endif - // unlink our entry from its hash chain position - - if( latch->prev ) - bt->latchsets[latch->prev].next = latch->next; - else - bt->table[hashidx].slot = latch->next; - - if( latch->next ) - bt->latchsets[latch->next].prev = latch->prev; - - // now link into head of the hash chain - - if( latch->next = bt->table[hashidx].slot ) - bt->latchsets[latch->next].prev = slot; - - bt->table[hashidx].slot = slot; - latch->prev = 0; - + latch->clock = 1; + latch->pin++; bt_spinreleasewrite(bt->table[hashidx].latch); return latch; } @@ -630,32 +612,31 @@ BtPage page; #else _InterlockedDecrement (&bt->latchmgr->latchdeployed); #endif - // find and reuse previous lru lock entry on victim hash chain + // find and reuse previous entry on victim while( 1 ) { #ifdef unix - idx = __sync_fetch_and_add(&bt->latchmgr->latchvictim, 1); + slot = __sync_fetch_and_add(&bt->latchmgr->latchvictim, 1); #else - idx = _InterlockedIncrement (&bt->latchmgr->latchvictim) - 1; + slot = _InterlockedIncrement (&bt->latchmgr->latchvictim) - 1; #endif // try to get write lock on hash chain // skip entry if not obtained - // or has outstanding locks + // or has outstanding pins + + slot %= bt->latchmgr->latchtotal; + latch = bt->latchsets + slot; - idx %= bt->latchmgr->latchhash; + idx = latch->page_no % bt->latchmgr->latchhash; - if( !bt_spinwritetry (bt->table[idx].latch) ) + if( !slot ) continue; - if( slot = bt->table[idx].slot ) - while( 1 ) { - latch = bt->latchsets + slot; - if( !latch->next ) - break; - slot = latch->next; - } + if( !bt_spinwritetry (bt->table[idx].latch) ) + continue; - if( !slot || latch->pin ) { + if( latch->clock ) { + latch->clock = 0; bt_spinreleasewrite (bt->table[idx].latch); continue; } @@ -663,7 +644,9 @@ BtPage page; // update permanent page area in btree page = (BtPage)((uid)slot * bt->page_size + bt->pagepool); - +#ifdef unix + posix_fadvise (bt->idx, page_no << bt->page_bits, bt->page_size, POSIX_FADV_WILLNEED); +#endif if( page->dirty ) if( bt_writepage (bt, page, latch->page_no) ) return NULL; @@ -750,7 +733,8 @@ struct flock lock[1]; bt = calloc (1, sizeof(BtDb)); bt->idx = open ((char*)name, O_RDWR | O_CREAT, 0666); - + posix_fadvise( bt->idx, 0, 0, POSIX_FADV_RANDOM); + if( bt->idx == -1 ) { fprintf(stderr, "unable to open %s\n", name); return free(bt), NULL; @@ -789,7 +773,7 @@ struct flock lock[1]; #endif #ifdef unix - latchmgr = malloc (BT_maxpage); + latchmgr = valloc (BT_maxpage); *amt = 0; // read minimum page size to get root info @@ -880,13 +864,12 @@ struct flock lock[1]; // clear out buffer pool pages memset(latchmgr, 0, bt->page_size); - last = MIN_lvl; + last = MIN_lvl + nlatchpage; - while( ++last < ((MIN_lvl + 1 + nlatchpage) ) ) - if( bt_writepage (bt, latchmgr->alloc, last) ) { + if( bt_writepage (bt, latchmgr->alloc, last) ) { fprintf (stderr, "Unable to write buffer pool page %.8x\n", last); return bt_close (bt), NULL; - } + } #ifdef unix free (latchmgr); @@ -919,6 +902,7 @@ btlatch: fprintf (stderr, "Unable to mmap buffer pool, errno = %d", errno); return bt_close (bt), NULL; } + madvise (bt->table, (uid)nlatchpage << bt->page_bits, MADV_RANDOM | MADV_WILLNEED); #else flag = PAGE_READWRITE; bt->halloc = CreateFileMapping(bt->idx, NULL, flag, 0, ((uid)nlatchpage + LATCH_page) * bt->page_size, NULL); @@ -940,7 +924,7 @@ btlatch: bt->latchsets = (BtLatchSet *)(bt->pagepool - (uid)bt->latchmgr->latchtotal * sizeof(BtLatchSet)); #ifdef unix - bt->mem = malloc (2 * bt->page_size); + bt->mem = valloc (2 * bt->page_size); #else bt->mem = VirtualAlloc(NULL, 2 * bt->page_size, MEM_COMMIT, PAGE_READWRITE); #endif @@ -1062,7 +1046,7 @@ int ans; void bt_update (BtDb *bt, BtPage page) { #ifdef unix - msync (page, bt->page_size, MS_ASYNC); +// msync (page, bt->page_size, MS_ASYNC); #else // FlushViewOfFile (page, bt->page_size); #endif @@ -1883,11 +1867,14 @@ uint bt_audit (BtDb *bt) uint idx, hashidx; uid next, page_no; BtLatchSet *latch; +uint blks[64]; uint cnt = 0; BtPage page; uint amt[1]; BtKey ptr; + memset (blks, 0, sizeof(blks)); + if( *(ushort *)(bt->latchmgr->lock) ) fprintf(stderr, "Alloc page locked\n"); *(ushort *)(bt->latchmgr->lock) = 0; @@ -1944,12 +1931,15 @@ BtKey ptr; } if( !bt->frame->lvl ) cnt += bt->frame->act; + blks[bt->frame->lvl]++; } if( page_no > LEAF_page ) next = page_no + 1; page_no = next; } + for( idx = 0; blks[idx]; idx++ ) + fprintf(stderr, "%d lvl %d blocks\n", blks[idx], idx); return cnt - 1; } @@ -2024,15 +2014,20 @@ int ch, cnt = 0, bits = 12, idx; unsigned char key[256]; double done, start; uid next, page_no; +BtLatchSet *latch; float elapsed; time_t tod[1]; uint scan = 0; uint len = 0; uint map = 0; +BtPage page; BtKey ptr; BtDb *bt; FILE *in; +#ifdef WIN32 + _setmode (1, _O_BINARY); +#endif if( argc < 4 ) { fprintf (stderr, "Usage: %s idx_file src_file Read/Write/Scan/Delete/Find/Count [page_bits mapped_pool_pages start_line_number]\n", argv[0]); fprintf (stderr, " page_bits: size of btree page in bits\n"); @@ -2061,15 +2056,27 @@ FILE *in; switch(argv[3][0]| 0x20) { - case 'a': - fprintf(stderr, "started audit for %s\n", argv[2]); + case 'p': // display page + if( latch = bt_pinlatch (bt, off) ) + page = bt_mappage (bt, latch); + else + fprintf(stderr, "unable to read page %.8x\n", off); + + write (1, page, bt->page_size); + break; + + case 'a': // buffer pool audit + fprintf(stderr, "started audit for %s\n", argv[1]); cnt = bt_audit (bt); - fprintf(stderr, "finished audit for %s, %d keys\n", argv[2], cnt); + fprintf(stderr, "finished audit for %s, %d keys\n", argv[1], cnt); break; - case 'w': + case 'w': // write keys fprintf(stderr, "started indexing for %s\n", argv[2]); - if( argc > 2 && (in = fopen (argv[2], "rb")) ) + if( argc > 2 && (in = fopen (argv[2], "rb")) ) { +#ifdef unix + posix_fadvise( fileno(in), 0, 0, POSIX_FADV_NOREUSE); +#endif while( ch = getc(in), ch != EOF ) if( ch == '\n' ) { @@ -2082,12 +2089,16 @@ FILE *in; } else if( len < 245 ) key[len++] = ch; + } fprintf(stderr, "finished adding keys for %s, %d \n", argv[2], line); break; - case 'd': + case 'd': // delete keys fprintf(stderr, "started deleting keys for %s\n", argv[2]); - if( argc > 2 && (in = fopen (argv[2], "rb")) ) + if( argc > 2 && (in = fopen (argv[2], "rb")) ) { +#ifdef unix + posix_fadvise( fileno(in), 0, 0, POSIX_FADV_NOREUSE); +#endif while( ch = getc(in), ch != EOF ) if( ch == '\n' ) { @@ -2100,12 +2111,16 @@ FILE *in; } else if( len < 245 ) key[len++] = ch; + } fprintf(stderr, "finished deleting keys for %s, %d \n", argv[2], line); break; - case 'f': + case 'f': // find keys fprintf(stderr, "started finding keys for %s\n", argv[2]); - if( argc > 2 && (in = fopen (argv[2], "rb")) ) + if( argc > 2 && (in = fopen (argv[2], "rb")) ) { +#ifdef unix + posix_fadvise( fileno(in), 0, 0, POSIX_FADV_NOREUSE); +#endif while( ch = getc(in), ch != EOF ) if( ch == '\n' ) { @@ -2120,10 +2135,11 @@ FILE *in; } else if( len < 245 ) key[len++] = ch; + } fprintf(stderr, "finished search of %d keys for %s, found %d\n", line, argv[2], found); break; - case 's': + case 's': // scan and print keys fprintf(stderr, "started scaning\n"); cnt = len = key[0] = 0; @@ -2142,7 +2158,7 @@ FILE *in; fprintf(stderr, " Total keys read %d\n", cnt - 1); break; - case 'c': + case 'c': // count keys fprintf(stderr, "started counting\n"); cnt = 0; @@ -2150,8 +2166,6 @@ FILE *in; page_no = LEAF_page; while( page_no < bt_getid(bt->latchmgr->alloc->right) ) { - BtLatchSet *latch; - BtPage page; if( latch = bt_pinlatch (bt, page_no) ) page = bt_mappage (bt, latch); if( !page->free && !page->lvl ) diff --git a/threads2h.c b/threads2h.c index 7602b11..500e8b3 100644 --- a/threads2h.c +++ b/threads2h.c @@ -179,7 +179,6 @@ typedef struct BtPage_ { // The memory mapping pool table buffer manager entry typedef struct { - unsigned long long int lru; // number of times accessed uid basepage; // mapped base page number char *map; // mapped memory pointer ushort slot; // slot index in this array @@ -191,6 +190,8 @@ typedef struct { #endif } BtPool; +#define CLOCK_bit 0x8000 // bit in pool->pin + // The loadpage interface object typedef struct { @@ -229,7 +230,7 @@ typedef struct { ushort poolmax; // highest page pool node allocated ushort poolmask; // total number of pages in mmap segment - 1 ushort hashsize; // size of Hash Table for pool entries - volatile uint evicted; // last evicted hash table slot + volatile uint evicted; // last evicted pool table slot ushort *hash; // pool index for hash entries BtSpinLatch *latch; // latches for hash table slots BtLatchMgr *latchmgr; // mapped latch page from allocation page @@ -1116,7 +1117,7 @@ uint slot; pool->hashprev = pool->hashnext = NULL; pool->basepage = page_no & ~bt->mgr->poolmask; - pool->lru = 1; + pool->pin = CLOCK_bit + 1; if( slot = bt->mgr->hash[idx] ) { node = bt->mgr->pool + slot; @@ -1127,32 +1128,6 @@ uint slot; bt->mgr->hash[idx] = pool->slot; } -// find best segment to evict from buffer pool - -BtPool *bt_findlru (BtDb *bt, uint hashslot) -{ -unsigned long long int target = ~0LL; -BtPool *pool = NULL, *node; - - if( !hashslot ) - return NULL; - - node = bt->mgr->pool + hashslot; - - // scan pool entries under hash table slot - - do { - if( node->pin ) - continue; - if( node->lru > target ) - continue; - target = node->lru; - pool = node; - } while( node = node->hashnext ); - - return pool; -} - // map new buffer pool segment to virtual memory BTERR bt_mapsegment(BtDb *bt, BtPool *pool, uid page_no) @@ -1207,42 +1182,25 @@ void bt_unpinpool (BtPool *pool) BtPool *bt_pinpool(BtDb *bt, uid page_no) { +uint slot, hashidx, idx, victim; BtPool *pool, *node, *next; -uint slot, idx, victim; // lock hash table chain - idx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize; - bt_spinreadlock (&bt->mgr->latch[idx]); + hashidx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize; + bt_spinwritelock (&bt->mgr->latch[hashidx]); // look up in hash table - if( pool = bt_findpool(bt, page_no, idx) ) { -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleaseread (&bt->mgr->latch[idx]); - pool->lru++; - return pool; - } - - // upgrade to write lock - - bt_spinreleaseread (&bt->mgr->latch[idx]); - bt_spinwritelock (&bt->mgr->latch[idx]); - - // try to find page in pool with write lock - - if( pool = bt_findpool(bt, page_no, idx) ) { + if( pool = bt_findpool(bt, page_no, hashidx) ) { #ifdef unix + __sync_fetch_and_or(&pool->pin, CLOCK_bit); __sync_fetch_and_add(&pool->pin, 1); #else + _InterlockedOr16 (&pool->pin, CLOCK_bit); _InterlockedIncrement16 (&pool->pin); #endif - bt_spinreleasewrite (&bt->mgr->latch[idx]); - pool->lru++; + bt_spinreleasewrite (&bt->mgr->latch[hashidx]); return pool; } @@ -1262,13 +1220,8 @@ uint slot, idx, victim; if( bt_mapsegment(bt, pool, page_no) ) return NULL; - bt_linkhash(bt, pool, page_no, idx); -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleasewrite (&bt->mgr->latch[idx]); + bt_linkhash(bt, pool, page_no, hashidx); + bt_spinreleasewrite (&bt->mgr->latch[hashidx]); return pool; } @@ -1287,20 +1240,30 @@ uint slot, idx, victim; #else victim = _InterlockedIncrement (&bt->mgr->evicted) - 1; #endif - victim %= bt->mgr->hashsize; + victim %= bt->mgr->poolmax; + pool = bt->mgr->pool + victim; + idx = (uint)(pool->basepage >> bt->mgr->seg_bits) % bt->mgr->hashsize; + + if( !victim ) + continue; // try to get write lock // skip entry if not obtained - if( !bt_spinwritetry (&bt->mgr->latch[victim]) ) + if( !bt_spinwritetry (&bt->mgr->latch[idx]) ) continue; - // if pool entry is empty - // or any pages are pinned - // skip this entry + // skip this entry if + // page is pinned + // or clock bit is set - if( !(pool = bt_findlru(bt, bt->mgr->hash[victim])) ) { - bt_spinreleasewrite (&bt->mgr->latch[victim]); + if( pool->pin ) { +#ifdef unix + __sync_fetch_and_and(&pool->pin, ~CLOCK_bit); +#else + _InterlockedAnd16 (&pool->pin, ~CLOCK_bit); +#endif + bt_spinreleasewrite (&bt->mgr->latch[idx]); continue; } @@ -1309,14 +1272,14 @@ uint slot, idx, victim; if( node = pool->hashprev ) node->hashnext = pool->hashnext; else if( node = pool->hashnext ) - bt->mgr->hash[victim] = node->slot; + bt->mgr->hash[idx] = node->slot; else - bt->mgr->hash[victim] = 0; + bt->mgr->hash[idx] = 0; if( node = pool->hashnext ) node->hashprev = pool->hashprev; - bt_spinreleasewrite (&bt->mgr->latch[victim]); + bt_spinreleasewrite (&bt->mgr->latch[idx]); // remove old file mapping #ifdef unix @@ -1334,13 +1297,8 @@ uint slot, idx, victim; if( bt_mapsegment(bt, pool, page_no) ) return NULL; - bt_linkhash(bt, pool, page_no, idx); -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleasewrite (&bt->mgr->latch[idx]); + bt_linkhash(bt, pool, page_no, hashidx); + bt_spinreleasewrite (&bt->mgr->latch[hashidx]); return pool; } } @@ -2296,7 +2254,7 @@ BtPageSet set[1]; BtKey ptr; #ifdef unix - for( idx = 1; idx < bt->mgr->latchmgr->latchdeployed; idx++ ) { + for( idx = 1; idx <= bt->mgr->latchmgr->latchdeployed; idx++ ) { set->latch = bt->mgr->latchsets + idx; if( set->latch->pin ) { fprintf(stderr, "latchset %d pinned for page %.6x\n", idx, set->latch->page_no); @@ -2433,8 +2391,6 @@ FILE *in; found++; else if( bt->err ) fprintf(stderr, "Error %d Syserr %d Line: %d\n", bt->err, errno, line), exit(0); - else - fprintf(stderr, "Unable to find key %.*s line %d\n", len, key, line); len = 0; } else if( len < 255 ) diff --git a/threads2i.c b/threads2i.c index b67940e..f1d0269 100644 --- a/threads2i.c +++ b/threads2i.c @@ -171,7 +171,6 @@ typedef struct BtPage_ { // The memory mapping pool table buffer manager entry typedef struct { - unsigned long long int lru; // number of times accessed uid basepage; // mapped base page number char *map; // mapped memory pointer ushort slot; // slot index in this array @@ -183,6 +182,8 @@ typedef struct { #endif } BtPool; +#define CLOCK_bit 0x8000 // bit in pool->pin + // The loadpage interface object typedef struct { @@ -1027,7 +1028,7 @@ uint slot; pool->hashprev = pool->hashnext = NULL; pool->basepage = page_no & ~bt->mgr->poolmask; - pool->lru = 1; + pool->pin = CLOCK_bit + 1; if( slot = bt->mgr->hash[idx] ) { node = bt->mgr->pool + slot; @@ -1038,32 +1039,6 @@ uint slot; bt->mgr->hash[idx] = pool->slot; } -// find best segment to evict from buffer pool - -BtPool *bt_findlru (BtDb *bt, uint hashslot) -{ -unsigned long long int target = ~0LL; -BtPool *pool = NULL, *node; - - if( !hashslot ) - return NULL; - - node = bt->mgr->pool + hashslot; - - // scan pool entries under hash table slot - - do { - if( node->pin ) - continue; - if( node->lru > target ) - continue; - target = node->lru; - pool = node; - } while( node = node->hashnext ); - - return pool; -} - // map new buffer pool segment to virtual memory BTERR bt_mapsegment(BtDb *bt, BtPool *pool, uid page_no) @@ -1119,42 +1094,25 @@ void bt_unpinpool (BtPool *pool) BtPool *bt_pinpool(BtDb *bt, uid page_no) { +uint slot, hashidx, idx, victim; BtPool *pool, *node, *next; -uint slot, idx, victim; // lock hash table chain - idx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize; - bt_spinreadlock (&bt->mgr->latch[idx]); + hashidx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize; + bt_spinwritelock (&bt->mgr->latch[hashidx]); // look up in hash table - if( pool = bt_findpool(bt, page_no, idx) ) { -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleaseread (&bt->mgr->latch[idx]); - pool->lru++; - return pool; - } - - // upgrade to write lock - - bt_spinreleaseread (&bt->mgr->latch[idx]); - bt_spinwritelock (&bt->mgr->latch[idx]); - - // try to find page in pool with write lock - - if( pool = bt_findpool(bt, page_no, idx) ) { + if( pool = bt_findpool(bt, page_no, hashidx) ) { #ifdef unix + __sync_fetch_and_or(&pool->pin, CLOCK_bit); __sync_fetch_and_add(&pool->pin, 1); #else + _InterlockedOr16 (&pool->pin, CLOCK_bit); _InterlockedIncrement16 (&pool->pin); #endif - bt_spinreleasewrite (&bt->mgr->latch[idx]); - pool->lru++; + bt_spinreleasewrite (&bt->mgr->latch[hashidx]); return pool; } @@ -1174,13 +1132,8 @@ uint slot, idx, victim; if( bt_mapsegment(bt, pool, page_no) ) return NULL; - bt_linkhash(bt, pool, page_no, idx); -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleasewrite (&bt->mgr->latch[idx]); + bt_linkhash(bt, pool, page_no, hashidx); + bt_spinreleasewrite (&bt->mgr->latch[hashidx]); return pool; } @@ -1199,20 +1152,30 @@ uint slot, idx, victim; #else victim = _InterlockedIncrement (&bt->mgr->evicted) - 1; #endif - victim %= bt->mgr->hashsize; + victim %= bt->mgr->poolmax; + pool = bt->mgr->pool + victim; + idx = (uint)(pool->basepage >> bt->mgr->seg_bits) % bt->mgr->hashsize; + + if( !victim ) + continue; // try to get write lock // skip entry if not obtained - if( !bt_spinwritetry (&bt->mgr->latch[victim]) ) + if( !bt_spinwritetry (&bt->mgr->latch[idx]) ) continue; - // if pool entry is empty - // or any pages are pinned - // skip this entry + // skip this entry if + // page is pinned + // or clock bit is set - if( !(pool = bt_findlru(bt, bt->mgr->hash[victim])) ) { - bt_spinreleasewrite (&bt->mgr->latch[victim]); + if( pool->pin ) { +#ifdef unix + __sync_fetch_and_and(&pool->pin, ~CLOCK_bit); +#else + _InterlockedAnd16 (&pool->pin, ~CLOCK_bit); +#endif + bt_spinreleasewrite (&bt->mgr->latch[idx]); continue; } @@ -1221,14 +1184,14 @@ uint slot, idx, victim; if( node = pool->hashprev ) node->hashnext = pool->hashnext; else if( node = pool->hashnext ) - bt->mgr->hash[victim] = node->slot; + bt->mgr->hash[idx] = node->slot; else - bt->mgr->hash[victim] = 0; + bt->mgr->hash[idx] = 0; if( node = pool->hashnext ) node->hashprev = pool->hashprev; - bt_spinreleasewrite (&bt->mgr->latch[victim]); + bt_spinreleasewrite (&bt->mgr->latch[idx]); // remove old file mapping #ifdef unix @@ -1246,13 +1209,8 @@ uint slot, idx, victim; if( bt_mapsegment(bt, pool, page_no) ) return NULL; - bt_linkhash(bt, pool, page_no, idx); -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleasewrite (&bt->mgr->latch[idx]); + bt_linkhash(bt, pool, page_no, hashidx); + bt_spinreleasewrite (&bt->mgr->latch[hashidx]); return pool; } } @@ -2210,7 +2168,7 @@ BtKey ptr; fprintf(stderr, "Alloc page locked\n"); *(uint *)(bt->mgr->latchmgr->lock) = 0; - for( idx = 1; idx < bt->mgr->latchmgr->latchdeployed; idx++ ) { + for( idx = 1; idx <= bt->mgr->latchmgr->latchdeployed; idx++ ) { latch = bt->mgr->latchsets + idx; if( *(uint *)latch->readwr ) fprintf(stderr, "latchset %d rwlocked for page %.8x\n", idx, latch->page_no); @@ -2367,8 +2325,6 @@ FILE *in; found++; else if( bt->err ) fprintf(stderr, "Error %d Syserr %d Line: %d\n", bt->err, errno, line), exit(0); - else - fprintf(stderr, "Unable to find key %.*s line %d\n", len, key, line); len = 0; } else if( len < 255 ) diff --git a/threads2j.c b/threads2j.c index ca15618..89ea6d8 100644 --- a/threads2j.c +++ b/threads2j.c @@ -183,7 +183,6 @@ typedef struct { // The memory mapping pool table buffer manager entry typedef struct { - unsigned long long int lru; // number of times accessed uid basepage; // mapped base page number char *map; // mapped memory pointer ushort slot; // slot index in this array @@ -195,6 +194,8 @@ typedef struct { #endif } BtPool; +#define CLOCK_bit 0x8000 // bit in pool->pin + // The loadpage interface object typedef struct { @@ -1070,7 +1071,7 @@ uint slot; pool->hashprev = pool->hashnext = NULL; pool->basepage = page_no & ~bt->mgr->poolmask; - pool->lru = 1; + pool->pin = CLOCK_bit + 1; if( slot = bt->mgr->hash[idx] ) { node = bt->mgr->pool + slot; @@ -1081,32 +1082,6 @@ uint slot; bt->mgr->hash[idx] = pool->slot; } -// find best segment to evict from buffer pool - -BtPool *bt_findlru (BtDb *bt, uint hashslot) -{ -unsigned long long int target = ~0LL; -BtPool *pool = NULL, *node; - - if( !hashslot ) - return NULL; - - node = bt->mgr->pool + hashslot; - - // scan pool entries under hash table slot - - do { - if( node->pin ) - continue; - if( node->lru > target ) - continue; - target = node->lru; - pool = node; - } while( node = node->hashnext ); - - return pool; -} - // map new buffer pool segment to virtual memory BTERR bt_mapsegment(BtDb *bt, BtPool *pool, uid page_no) @@ -1162,42 +1137,25 @@ void bt_unpinpool (BtPool *pool) BtPool *bt_pinpool(BtDb *bt, uid page_no) { +uint slot, hashidx, idx, victim; BtPool *pool, *node, *next; -uint slot, idx, victim; // lock hash table chain - idx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize; - bt_spinreadlock (&bt->mgr->latch[idx], 1); + hashidx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize; + bt_spinreadlock (&bt->mgr->latch[hashidx], 1); // look up in hash table - if( pool = bt_findpool(bt, page_no, idx) ) { -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleaseread (&bt->mgr->latch[idx], 1); - pool->lru++; - return pool; - } - - // upgrade to write lock - - bt_spinreleaseread (&bt->mgr->latch[idx], 1); - bt_spinwritelock (&bt->mgr->latch[idx], 1); - - // try to find page in pool with write lock - - if( pool = bt_findpool(bt, page_no, idx) ) { + if( pool = bt_findpool(bt, page_no, hashidx) ) { #ifdef unix + __sync_fetch_and_or(&pool->pin, CLOCK_bit); __sync_fetch_and_add(&pool->pin, 1); #else + _InterlockedOr16 (&pool->pin, CLOCK_bit); _InterlockedIncrement16 (&pool->pin); #endif - bt_spinreleasewrite (&bt->mgr->latch[idx], 1); - pool->lru++; + bt_spinreleaseread (&bt->mgr->latch[hashidx], 1); return pool; } @@ -1217,13 +1175,8 @@ uint slot, idx, victim; if( bt_mapsegment(bt, pool, page_no) ) return NULL; - bt_linkhash(bt, pool, page_no, idx); -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleasewrite (&bt->mgr->latch[idx], 1); + bt_linkhash(bt, pool, page_no, hashidx); + bt_spinreleasewrite (&bt->mgr->latch[hashidx], 1); return pool; } @@ -1242,20 +1195,30 @@ uint slot, idx, victim; #else victim = _InterlockedIncrement16 (&bt->mgr->evicted) - 1; #endif - victim %= bt->mgr->hashsize; + victim %= bt->mgr->poolmax; + pool = bt->mgr->pool + victim; + idx = (uint)(pool->basepage >> bt->mgr->seg_bits) % bt->mgr->hashsize; + + if( !victim ) + continue; // try to get write lock // skip entry if not obtained - if( !bt_spinwritetry (&bt->mgr->latch[victim]) ) + if( !bt_spinwritetry (&bt->mgr->latch[idx]) ) continue; - // if pool entry is empty - // or any pages are pinned - // skip this entry + // skip this entry if + // page is pinned + // or clock bit is set - if( !(pool = bt_findlru(bt, bt->mgr->hash[victim])) ) { - bt_spinreleasewrite (&bt->mgr->latch[victim], 1); + if( pool->pin ) { +#ifdef unix + __sync_fetch_and_and(&pool->pin, ~CLOCK_bit); +#else + _InterlockedAnd16 (&pool->pin, ~CLOCK_bit); +#endif + bt_spinreleasewrite (&bt->mgr->latch[idx], 1); continue; } @@ -1264,14 +1227,14 @@ uint slot, idx, victim; if( node = pool->hashprev ) node->hashnext = pool->hashnext; else if( node = pool->hashnext ) - bt->mgr->hash[victim] = node->slot; + bt->mgr->hash[idx] = node->slot; else - bt->mgr->hash[victim] = 0; + bt->mgr->hash[idx] = 0; if( node = pool->hashnext ) node->hashprev = pool->hashprev; - bt_spinreleasewrite (&bt->mgr->latch[victim], 1); + bt_spinreleasewrite (&bt->mgr->latch[idx], 1); // remove old file mapping #ifdef unix @@ -1289,13 +1252,8 @@ uint slot, idx, victim; if( bt_mapsegment(bt, pool, page_no) ) return NULL; - bt_linkhash(bt, pool, page_no, idx); -#ifdef unix - __sync_fetch_and_add(&pool->pin, 1); -#else - _InterlockedIncrement16 (&pool->pin); -#endif - bt_spinreleasewrite (&bt->mgr->latch[idx], 1); + bt_linkhash(bt, pool, page_no, hashidx); + bt_spinreleasewrite (&bt->mgr->latch[hashidx], 1); return pool; } } @@ -2258,7 +2216,7 @@ BtKey ptr; fprintf(stderr, "Alloc page locked\n"); *(uint *)(bt->mgr->latchmgr->lock) = 0; - for( idx = 1; idx < bt->mgr->latchmgr->latchdeployed; idx++ ) { + for( idx = 1; idx <= bt->mgr->latchmgr->latchdeployed; idx++ ) { latch = bt->mgr->latchsets + idx; if( *(uint *)latch->readwr ) fprintf(stderr, "latchset %d rwlocked for page %.8x\n", idx, latch->page_no); @@ -2415,8 +2373,6 @@ FILE *in; found++; else if( bt->err ) fprintf(stderr, "Error %d Syserr %d Line: %d\n", bt->err, errno, line), exit(0); - else - fprintf(stderr, "Unable to find key %.*s line %d\n", len, key, line); len = 0; } else if( len < 255 ) -- 2.40.0