#include <stdlib.h>
#include <time.h>
#include <fcntl.h>
+#include <io.h>
#endif
#include <memory.h>
BtSpinLatch readwr[1]; // read/write page lock
BtSpinLatch access[1]; // Access Intent/Page delete
BtSpinLatch parent[1]; // Posting of fence key in parent
+ volatile ushort clock; // accessed since last clock pass
volatile uint next; // next entry in hash table chain
volatile uint prev; // prev entry in hash table chain
volatile uint pin; // number of outstanding pins
bt->table[hashidx].slot = slot;
latch->page_no = page_no;
+ latch->clock = 1;
latch->prev = 0;
latch->pin = 1;
uint amt[1];
BtPage page;
- // try to find unpinned entry
+ // try to find our entry
bt_spinwritelock(bt->table[hashidx].latch);
break;
} while( slot = latch->next );
- // found our entry, bring to front of hash chain
+ // found our entry
if( slot ) {
latch = bt->latchsets + slot;
-#ifdef unix
- __sync_fetch_and_add(&latch->pin, 1);
-#else
- _InterlockedIncrement (&latch->pin);
-#endif
- // unlink our entry from its hash chain position
-
- if( latch->prev )
- bt->latchsets[latch->prev].next = latch->next;
- else
- bt->table[hashidx].slot = latch->next;
-
- if( latch->next )
- bt->latchsets[latch->next].prev = latch->prev;
-
- // now link into head of the hash chain
-
- if( latch->next = bt->table[hashidx].slot )
- bt->latchsets[latch->next].prev = slot;
-
- bt->table[hashidx].slot = slot;
- latch->prev = 0;
-
+ latch->clock = 1;
+ latch->pin++;
bt_spinreleasewrite(bt->table[hashidx].latch);
return latch;
}
#else
_InterlockedDecrement (&bt->latchmgr->latchdeployed);
#endif
- // find and reuse previous lru lock entry on victim hash chain
+ // find and reuse previous entry on victim
while( 1 ) {
#ifdef unix
- idx = __sync_fetch_and_add(&bt->latchmgr->latchvictim, 1);
+ slot = __sync_fetch_and_add(&bt->latchmgr->latchvictim, 1);
#else
- idx = _InterlockedIncrement (&bt->latchmgr->latchvictim) - 1;
+ slot = _InterlockedIncrement (&bt->latchmgr->latchvictim) - 1;
#endif
// try to get write lock on hash chain
// skip entry if not obtained
- // or has outstanding locks
+ // or has outstanding pins
+
+ slot %= bt->latchmgr->latchtotal;
+ latch = bt->latchsets + slot;
- idx %= bt->latchmgr->latchhash;
+ idx = latch->page_no % bt->latchmgr->latchhash;
- if( !bt_spinwritetry (bt->table[idx].latch) )
+ if( !slot )
continue;
- if( slot = bt->table[idx].slot )
- while( 1 ) {
- latch = bt->latchsets + slot;
- if( !latch->next )
- break;
- slot = latch->next;
- }
+ if( !bt_spinwritetry (bt->table[idx].latch) )
+ continue;
- if( !slot || latch->pin ) {
+ if( latch->clock ) {
+ latch->clock = 0;
bt_spinreleasewrite (bt->table[idx].latch);
continue;
}
// update permanent page area in btree
page = (BtPage)((uid)slot * bt->page_size + bt->pagepool);
-
+#ifdef unix
+ posix_fadvise (bt->idx, page_no << bt->page_bits, bt->page_size, POSIX_FADV_WILLNEED);
+#endif
if( page->dirty )
if( bt_writepage (bt, page, latch->page_no) )
return NULL;
bt = calloc (1, sizeof(BtDb));
bt->idx = open ((char*)name, O_RDWR | O_CREAT, 0666);
-
+ posix_fadvise( bt->idx, 0, 0, POSIX_FADV_RANDOM);
+
if( bt->idx == -1 ) {
fprintf(stderr, "unable to open %s\n", name);
return free(bt), NULL;
#endif
#ifdef unix
- latchmgr = malloc (BT_maxpage);
+ latchmgr = valloc (BT_maxpage);
*amt = 0;
// read minimum page size to get root info
// clear out buffer pool pages
memset(latchmgr, 0, bt->page_size);
- last = MIN_lvl;
+ last = MIN_lvl + nlatchpage;
- while( ++last < ((MIN_lvl + 1 + nlatchpage) ) )
- if( bt_writepage (bt, latchmgr->alloc, last) ) {
+ if( bt_writepage (bt, latchmgr->alloc, last) ) {
fprintf (stderr, "Unable to write buffer pool page %.8x\n", last);
return bt_close (bt), NULL;
- }
+ }
#ifdef unix
free (latchmgr);
fprintf (stderr, "Unable to mmap buffer pool, errno = %d", errno);
return bt_close (bt), NULL;
}
+ madvise (bt->table, (uid)nlatchpage << bt->page_bits, MADV_RANDOM | MADV_WILLNEED);
#else
flag = PAGE_READWRITE;
bt->halloc = CreateFileMapping(bt->idx, NULL, flag, 0, ((uid)nlatchpage + LATCH_page) * bt->page_size, NULL);
bt->latchsets = (BtLatchSet *)(bt->pagepool - (uid)bt->latchmgr->latchtotal * sizeof(BtLatchSet));
#ifdef unix
- bt->mem = malloc (2 * bt->page_size);
+ bt->mem = valloc (2 * bt->page_size);
#else
bt->mem = VirtualAlloc(NULL, 2 * bt->page_size, MEM_COMMIT, PAGE_READWRITE);
#endif
void bt_update (BtDb *bt, BtPage page)
{
#ifdef unix
- msync (page, bt->page_size, MS_ASYNC);
+// msync (page, bt->page_size, MS_ASYNC);
#else
// FlushViewOfFile (page, bt->page_size);
#endif
uint idx, hashidx;
uid next, page_no;
BtLatchSet *latch;
+uint blks[64];
uint cnt = 0;
BtPage page;
uint amt[1];
BtKey ptr;
+ memset (blks, 0, sizeof(blks));
+
if( *(ushort *)(bt->latchmgr->lock) )
fprintf(stderr, "Alloc page locked\n");
*(ushort *)(bt->latchmgr->lock) = 0;
}
if( !bt->frame->lvl )
cnt += bt->frame->act;
+ blks[bt->frame->lvl]++;
}
if( page_no > LEAF_page )
next = page_no + 1;
page_no = next;
}
+ for( idx = 0; blks[idx]; idx++ )
+ fprintf(stderr, "%d lvl %d blocks\n", blks[idx], idx);
return cnt - 1;
}
unsigned char key[256];
double done, start;
uid next, page_no;
+BtLatchSet *latch;
float elapsed;
time_t tod[1];
uint scan = 0;
uint len = 0;
uint map = 0;
+BtPage page;
BtKey ptr;
BtDb *bt;
FILE *in;
+#ifdef WIN32
+ _setmode (1, _O_BINARY);
+#endif
if( argc < 4 ) {
fprintf (stderr, "Usage: %s idx_file src_file Read/Write/Scan/Delete/Find/Count [page_bits mapped_pool_pages start_line_number]\n", argv[0]);
fprintf (stderr, " page_bits: size of btree page in bits\n");
switch(argv[3][0]| 0x20)
{
- case 'a':
- fprintf(stderr, "started audit for %s\n", argv[2]);
+ case 'p': // display page
+ if( latch = bt_pinlatch (bt, off) )
+ page = bt_mappage (bt, latch);
+ else
+ fprintf(stderr, "unable to read page %.8x\n", off);
+
+ write (1, page, bt->page_size);
+ break;
+
+ case 'a': // buffer pool audit
+ fprintf(stderr, "started audit for %s\n", argv[1]);
cnt = bt_audit (bt);
- fprintf(stderr, "finished audit for %s, %d keys\n", argv[2], cnt);
+ fprintf(stderr, "finished audit for %s, %d keys\n", argv[1], cnt);
break;
- case 'w':
+ case 'w': // write keys
fprintf(stderr, "started indexing for %s\n", argv[2]);
- if( argc > 2 && (in = fopen (argv[2], "rb")) )
+ if( argc > 2 && (in = fopen (argv[2], "rb")) ) {
+#ifdef unix
+ posix_fadvise( fileno(in), 0, 0, POSIX_FADV_NOREUSE);
+#endif
while( ch = getc(in), ch != EOF )
if( ch == '\n' )
{
}
else if( len < 245 )
key[len++] = ch;
+ }
fprintf(stderr, "finished adding keys for %s, %d \n", argv[2], line);
break;
- case 'd':
+ case 'd': // delete keys
fprintf(stderr, "started deleting keys for %s\n", argv[2]);
- if( argc > 2 && (in = fopen (argv[2], "rb")) )
+ if( argc > 2 && (in = fopen (argv[2], "rb")) ) {
+#ifdef unix
+ posix_fadvise( fileno(in), 0, 0, POSIX_FADV_NOREUSE);
+#endif
while( ch = getc(in), ch != EOF )
if( ch == '\n' )
{
}
else if( len < 245 )
key[len++] = ch;
+ }
fprintf(stderr, "finished deleting keys for %s, %d \n", argv[2], line);
break;
- case 'f':
+ case 'f': // find keys
fprintf(stderr, "started finding keys for %s\n", argv[2]);
- if( argc > 2 && (in = fopen (argv[2], "rb")) )
+ if( argc > 2 && (in = fopen (argv[2], "rb")) ) {
+#ifdef unix
+ posix_fadvise( fileno(in), 0, 0, POSIX_FADV_NOREUSE);
+#endif
while( ch = getc(in), ch != EOF )
if( ch == '\n' )
{
}
else if( len < 245 )
key[len++] = ch;
+ }
fprintf(stderr, "finished search of %d keys for %s, found %d\n", line, argv[2], found);
break;
- case 's':
+ case 's': // scan and print keys
fprintf(stderr, "started scaning\n");
cnt = len = key[0] = 0;
fprintf(stderr, " Total keys read %d\n", cnt - 1);
break;
- case 'c':
+ case 'c': // count keys
fprintf(stderr, "started counting\n");
cnt = 0;
page_no = LEAF_page;
while( page_no < bt_getid(bt->latchmgr->alloc->right) ) {
- BtLatchSet *latch;
- BtPage page;
if( latch = bt_pinlatch (bt, page_no) )
page = bt_mappage (bt, latch);
if( !page->free && !page->lvl )
// The memory mapping pool table buffer manager entry
typedef struct {
- unsigned long long int lru; // number of times accessed
uid basepage; // mapped base page number
char *map; // mapped memory pointer
ushort slot; // slot index in this array
#endif
} BtPool;
+#define CLOCK_bit 0x8000 // bit in pool->pin
+
// The loadpage interface object
typedef struct {
ushort poolmax; // highest page pool node allocated
ushort poolmask; // total number of pages in mmap segment - 1
ushort hashsize; // size of Hash Table for pool entries
- volatile uint evicted; // last evicted hash table slot
+ volatile uint evicted; // last evicted pool table slot
ushort *hash; // pool index for hash entries
BtSpinLatch *latch; // latches for hash table slots
BtLatchMgr *latchmgr; // mapped latch page from allocation page
pool->hashprev = pool->hashnext = NULL;
pool->basepage = page_no & ~bt->mgr->poolmask;
- pool->lru = 1;
+ pool->pin = CLOCK_bit + 1;
if( slot = bt->mgr->hash[idx] ) {
node = bt->mgr->pool + slot;
bt->mgr->hash[idx] = pool->slot;
}
-// find best segment to evict from buffer pool
-
-BtPool *bt_findlru (BtDb *bt, uint hashslot)
-{
-unsigned long long int target = ~0LL;
-BtPool *pool = NULL, *node;
-
- if( !hashslot )
- return NULL;
-
- node = bt->mgr->pool + hashslot;
-
- // scan pool entries under hash table slot
-
- do {
- if( node->pin )
- continue;
- if( node->lru > target )
- continue;
- target = node->lru;
- pool = node;
- } while( node = node->hashnext );
-
- return pool;
-}
-
// map new buffer pool segment to virtual memory
BTERR bt_mapsegment(BtDb *bt, BtPool *pool, uid page_no)
BtPool *bt_pinpool(BtDb *bt, uid page_no)
{
+uint slot, hashidx, idx, victim;
BtPool *pool, *node, *next;
-uint slot, idx, victim;
// lock hash table chain
- idx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize;
- bt_spinreadlock (&bt->mgr->latch[idx]);
+ hashidx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize;
+ bt_spinwritelock (&bt->mgr->latch[hashidx]);
// look up in hash table
- if( pool = bt_findpool(bt, page_no, idx) ) {
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleaseread (&bt->mgr->latch[idx]);
- pool->lru++;
- return pool;
- }
-
- // upgrade to write lock
-
- bt_spinreleaseread (&bt->mgr->latch[idx]);
- bt_spinwritelock (&bt->mgr->latch[idx]);
-
- // try to find page in pool with write lock
-
- if( pool = bt_findpool(bt, page_no, idx) ) {
+ if( pool = bt_findpool(bt, page_no, hashidx) ) {
#ifdef unix
+ __sync_fetch_and_or(&pool->pin, CLOCK_bit);
__sync_fetch_and_add(&pool->pin, 1);
#else
+ _InterlockedOr16 (&pool->pin, CLOCK_bit);
_InterlockedIncrement16 (&pool->pin);
#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx]);
- pool->lru++;
+ bt_spinreleasewrite (&bt->mgr->latch[hashidx]);
return pool;
}
if( bt_mapsegment(bt, pool, page_no) )
return NULL;
- bt_linkhash(bt, pool, page_no, idx);
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx]);
+ bt_linkhash(bt, pool, page_no, hashidx);
+ bt_spinreleasewrite (&bt->mgr->latch[hashidx]);
return pool;
}
#else
victim = _InterlockedIncrement (&bt->mgr->evicted) - 1;
#endif
- victim %= bt->mgr->hashsize;
+ victim %= bt->mgr->poolmax;
+ pool = bt->mgr->pool + victim;
+ idx = (uint)(pool->basepage >> bt->mgr->seg_bits) % bt->mgr->hashsize;
+
+ if( !victim )
+ continue;
// try to get write lock
// skip entry if not obtained
- if( !bt_spinwritetry (&bt->mgr->latch[victim]) )
+ if( !bt_spinwritetry (&bt->mgr->latch[idx]) )
continue;
- // if pool entry is empty
- // or any pages are pinned
- // skip this entry
+ // skip this entry if
+ // page is pinned
+ // or clock bit is set
- if( !(pool = bt_findlru(bt, bt->mgr->hash[victim])) ) {
- bt_spinreleasewrite (&bt->mgr->latch[victim]);
+ if( pool->pin ) {
+#ifdef unix
+ __sync_fetch_and_and(&pool->pin, ~CLOCK_bit);
+#else
+ _InterlockedAnd16 (&pool->pin, ~CLOCK_bit);
+#endif
+ bt_spinreleasewrite (&bt->mgr->latch[idx]);
continue;
}
if( node = pool->hashprev )
node->hashnext = pool->hashnext;
else if( node = pool->hashnext )
- bt->mgr->hash[victim] = node->slot;
+ bt->mgr->hash[idx] = node->slot;
else
- bt->mgr->hash[victim] = 0;
+ bt->mgr->hash[idx] = 0;
if( node = pool->hashnext )
node->hashprev = pool->hashprev;
- bt_spinreleasewrite (&bt->mgr->latch[victim]);
+ bt_spinreleasewrite (&bt->mgr->latch[idx]);
// remove old file mapping
#ifdef unix
if( bt_mapsegment(bt, pool, page_no) )
return NULL;
- bt_linkhash(bt, pool, page_no, idx);
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx]);
+ bt_linkhash(bt, pool, page_no, hashidx);
+ bt_spinreleasewrite (&bt->mgr->latch[hashidx]);
return pool;
}
}
BtKey ptr;
#ifdef unix
- for( idx = 1; idx < bt->mgr->latchmgr->latchdeployed; idx++ ) {
+ for( idx = 1; idx <= bt->mgr->latchmgr->latchdeployed; idx++ ) {
set->latch = bt->mgr->latchsets + idx;
if( set->latch->pin ) {
fprintf(stderr, "latchset %d pinned for page %.6x\n", idx, set->latch->page_no);
found++;
else if( bt->err )
fprintf(stderr, "Error %d Syserr %d Line: %d\n", bt->err, errno, line), exit(0);
- else
- fprintf(stderr, "Unable to find key %.*s line %d\n", len, key, line);
len = 0;
}
else if( len < 255 )
// The memory mapping pool table buffer manager entry
typedef struct {
- unsigned long long int lru; // number of times accessed
uid basepage; // mapped base page number
char *map; // mapped memory pointer
ushort slot; // slot index in this array
#endif
} BtPool;
+#define CLOCK_bit 0x8000 // bit in pool->pin
+
// The loadpage interface object
typedef struct {
pool->hashprev = pool->hashnext = NULL;
pool->basepage = page_no & ~bt->mgr->poolmask;
- pool->lru = 1;
+ pool->pin = CLOCK_bit + 1;
if( slot = bt->mgr->hash[idx] ) {
node = bt->mgr->pool + slot;
bt->mgr->hash[idx] = pool->slot;
}
-// find best segment to evict from buffer pool
-
-BtPool *bt_findlru (BtDb *bt, uint hashslot)
-{
-unsigned long long int target = ~0LL;
-BtPool *pool = NULL, *node;
-
- if( !hashslot )
- return NULL;
-
- node = bt->mgr->pool + hashslot;
-
- // scan pool entries under hash table slot
-
- do {
- if( node->pin )
- continue;
- if( node->lru > target )
- continue;
- target = node->lru;
- pool = node;
- } while( node = node->hashnext );
-
- return pool;
-}
-
// map new buffer pool segment to virtual memory
BTERR bt_mapsegment(BtDb *bt, BtPool *pool, uid page_no)
BtPool *bt_pinpool(BtDb *bt, uid page_no)
{
+uint slot, hashidx, idx, victim;
BtPool *pool, *node, *next;
-uint slot, idx, victim;
// lock hash table chain
- idx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize;
- bt_spinreadlock (&bt->mgr->latch[idx]);
+ hashidx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize;
+ bt_spinwritelock (&bt->mgr->latch[hashidx]);
// look up in hash table
- if( pool = bt_findpool(bt, page_no, idx) ) {
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleaseread (&bt->mgr->latch[idx]);
- pool->lru++;
- return pool;
- }
-
- // upgrade to write lock
-
- bt_spinreleaseread (&bt->mgr->latch[idx]);
- bt_spinwritelock (&bt->mgr->latch[idx]);
-
- // try to find page in pool with write lock
-
- if( pool = bt_findpool(bt, page_no, idx) ) {
+ if( pool = bt_findpool(bt, page_no, hashidx) ) {
#ifdef unix
+ __sync_fetch_and_or(&pool->pin, CLOCK_bit);
__sync_fetch_and_add(&pool->pin, 1);
#else
+ _InterlockedOr16 (&pool->pin, CLOCK_bit);
_InterlockedIncrement16 (&pool->pin);
#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx]);
- pool->lru++;
+ bt_spinreleasewrite (&bt->mgr->latch[hashidx]);
return pool;
}
if( bt_mapsegment(bt, pool, page_no) )
return NULL;
- bt_linkhash(bt, pool, page_no, idx);
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx]);
+ bt_linkhash(bt, pool, page_no, hashidx);
+ bt_spinreleasewrite (&bt->mgr->latch[hashidx]);
return pool;
}
#else
victim = _InterlockedIncrement (&bt->mgr->evicted) - 1;
#endif
- victim %= bt->mgr->hashsize;
+ victim %= bt->mgr->poolmax;
+ pool = bt->mgr->pool + victim;
+ idx = (uint)(pool->basepage >> bt->mgr->seg_bits) % bt->mgr->hashsize;
+
+ if( !victim )
+ continue;
// try to get write lock
// skip entry if not obtained
- if( !bt_spinwritetry (&bt->mgr->latch[victim]) )
+ if( !bt_spinwritetry (&bt->mgr->latch[idx]) )
continue;
- // if pool entry is empty
- // or any pages are pinned
- // skip this entry
+ // skip this entry if
+ // page is pinned
+ // or clock bit is set
- if( !(pool = bt_findlru(bt, bt->mgr->hash[victim])) ) {
- bt_spinreleasewrite (&bt->mgr->latch[victim]);
+ if( pool->pin ) {
+#ifdef unix
+ __sync_fetch_and_and(&pool->pin, ~CLOCK_bit);
+#else
+ _InterlockedAnd16 (&pool->pin, ~CLOCK_bit);
+#endif
+ bt_spinreleasewrite (&bt->mgr->latch[idx]);
continue;
}
if( node = pool->hashprev )
node->hashnext = pool->hashnext;
else if( node = pool->hashnext )
- bt->mgr->hash[victim] = node->slot;
+ bt->mgr->hash[idx] = node->slot;
else
- bt->mgr->hash[victim] = 0;
+ bt->mgr->hash[idx] = 0;
if( node = pool->hashnext )
node->hashprev = pool->hashprev;
- bt_spinreleasewrite (&bt->mgr->latch[victim]);
+ bt_spinreleasewrite (&bt->mgr->latch[idx]);
// remove old file mapping
#ifdef unix
if( bt_mapsegment(bt, pool, page_no) )
return NULL;
- bt_linkhash(bt, pool, page_no, idx);
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx]);
+ bt_linkhash(bt, pool, page_no, hashidx);
+ bt_spinreleasewrite (&bt->mgr->latch[hashidx]);
return pool;
}
}
fprintf(stderr, "Alloc page locked\n");
*(uint *)(bt->mgr->latchmgr->lock) = 0;
- for( idx = 1; idx < bt->mgr->latchmgr->latchdeployed; idx++ ) {
+ for( idx = 1; idx <= bt->mgr->latchmgr->latchdeployed; idx++ ) {
latch = bt->mgr->latchsets + idx;
if( *(uint *)latch->readwr )
fprintf(stderr, "latchset %d rwlocked for page %.8x\n", idx, latch->page_no);
found++;
else if( bt->err )
fprintf(stderr, "Error %d Syserr %d Line: %d\n", bt->err, errno, line), exit(0);
- else
- fprintf(stderr, "Unable to find key %.*s line %d\n", len, key, line);
len = 0;
}
else if( len < 255 )
// The memory mapping pool table buffer manager entry
typedef struct {
- unsigned long long int lru; // number of times accessed
uid basepage; // mapped base page number
char *map; // mapped memory pointer
ushort slot; // slot index in this array
#endif
} BtPool;
+#define CLOCK_bit 0x8000 // bit in pool->pin
+
// The loadpage interface object
typedef struct {
pool->hashprev = pool->hashnext = NULL;
pool->basepage = page_no & ~bt->mgr->poolmask;
- pool->lru = 1;
+ pool->pin = CLOCK_bit + 1;
if( slot = bt->mgr->hash[idx] ) {
node = bt->mgr->pool + slot;
bt->mgr->hash[idx] = pool->slot;
}
-// find best segment to evict from buffer pool
-
-BtPool *bt_findlru (BtDb *bt, uint hashslot)
-{
-unsigned long long int target = ~0LL;
-BtPool *pool = NULL, *node;
-
- if( !hashslot )
- return NULL;
-
- node = bt->mgr->pool + hashslot;
-
- // scan pool entries under hash table slot
-
- do {
- if( node->pin )
- continue;
- if( node->lru > target )
- continue;
- target = node->lru;
- pool = node;
- } while( node = node->hashnext );
-
- return pool;
-}
-
// map new buffer pool segment to virtual memory
BTERR bt_mapsegment(BtDb *bt, BtPool *pool, uid page_no)
BtPool *bt_pinpool(BtDb *bt, uid page_no)
{
+uint slot, hashidx, idx, victim;
BtPool *pool, *node, *next;
-uint slot, idx, victim;
// lock hash table chain
- idx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize;
- bt_spinreadlock (&bt->mgr->latch[idx], 1);
+ hashidx = (uint)(page_no >> bt->mgr->seg_bits) % bt->mgr->hashsize;
+ bt_spinreadlock (&bt->mgr->latch[hashidx], 1);
// look up in hash table
- if( pool = bt_findpool(bt, page_no, idx) ) {
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleaseread (&bt->mgr->latch[idx], 1);
- pool->lru++;
- return pool;
- }
-
- // upgrade to write lock
-
- bt_spinreleaseread (&bt->mgr->latch[idx], 1);
- bt_spinwritelock (&bt->mgr->latch[idx], 1);
-
- // try to find page in pool with write lock
-
- if( pool = bt_findpool(bt, page_no, idx) ) {
+ if( pool = bt_findpool(bt, page_no, hashidx) ) {
#ifdef unix
+ __sync_fetch_and_or(&pool->pin, CLOCK_bit);
__sync_fetch_and_add(&pool->pin, 1);
#else
+ _InterlockedOr16 (&pool->pin, CLOCK_bit);
_InterlockedIncrement16 (&pool->pin);
#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx], 1);
- pool->lru++;
+ bt_spinreleaseread (&bt->mgr->latch[hashidx], 1);
return pool;
}
if( bt_mapsegment(bt, pool, page_no) )
return NULL;
- bt_linkhash(bt, pool, page_no, idx);
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx], 1);
+ bt_linkhash(bt, pool, page_no, hashidx);
+ bt_spinreleasewrite (&bt->mgr->latch[hashidx], 1);
return pool;
}
#else
victim = _InterlockedIncrement16 (&bt->mgr->evicted) - 1;
#endif
- victim %= bt->mgr->hashsize;
+ victim %= bt->mgr->poolmax;
+ pool = bt->mgr->pool + victim;
+ idx = (uint)(pool->basepage >> bt->mgr->seg_bits) % bt->mgr->hashsize;
+
+ if( !victim )
+ continue;
// try to get write lock
// skip entry if not obtained
- if( !bt_spinwritetry (&bt->mgr->latch[victim]) )
+ if( !bt_spinwritetry (&bt->mgr->latch[idx]) )
continue;
- // if pool entry is empty
- // or any pages are pinned
- // skip this entry
+ // skip this entry if
+ // page is pinned
+ // or clock bit is set
- if( !(pool = bt_findlru(bt, bt->mgr->hash[victim])) ) {
- bt_spinreleasewrite (&bt->mgr->latch[victim], 1);
+ if( pool->pin ) {
+#ifdef unix
+ __sync_fetch_and_and(&pool->pin, ~CLOCK_bit);
+#else
+ _InterlockedAnd16 (&pool->pin, ~CLOCK_bit);
+#endif
+ bt_spinreleasewrite (&bt->mgr->latch[idx], 1);
continue;
}
if( node = pool->hashprev )
node->hashnext = pool->hashnext;
else if( node = pool->hashnext )
- bt->mgr->hash[victim] = node->slot;
+ bt->mgr->hash[idx] = node->slot;
else
- bt->mgr->hash[victim] = 0;
+ bt->mgr->hash[idx] = 0;
if( node = pool->hashnext )
node->hashprev = pool->hashprev;
- bt_spinreleasewrite (&bt->mgr->latch[victim], 1);
+ bt_spinreleasewrite (&bt->mgr->latch[idx], 1);
// remove old file mapping
#ifdef unix
if( bt_mapsegment(bt, pool, page_no) )
return NULL;
- bt_linkhash(bt, pool, page_no, idx);
-#ifdef unix
- __sync_fetch_and_add(&pool->pin, 1);
-#else
- _InterlockedIncrement16 (&pool->pin);
-#endif
- bt_spinreleasewrite (&bt->mgr->latch[idx], 1);
+ bt_linkhash(bt, pool, page_no, hashidx);
+ bt_spinreleasewrite (&bt->mgr->latch[hashidx], 1);
return pool;
}
}
fprintf(stderr, "Alloc page locked\n");
*(uint *)(bt->mgr->latchmgr->lock) = 0;
- for( idx = 1; idx < bt->mgr->latchmgr->latchdeployed; idx++ ) {
+ for( idx = 1; idx <= bt->mgr->latchmgr->latchdeployed; idx++ ) {
latch = bt->mgr->latchsets + idx;
if( *(uint *)latch->readwr )
fprintf(stderr, "latchset %d rwlocked for page %.8x\n", idx, latch->page_no);
found++;
else if( bt->err )
fprintf(stderr, "Error %d Syserr %d Line: %d\n", bt->err, errno, line), exit(0);
- else
- fprintf(stderr, "Unable to find key %.*s line %d\n", len, key, line);
len = 0;
}
else if( len < 255 )