-// btree version 2t
+// btree version 2t sched_yield version of spinlocks
// with reworked bt_deletekey code
-// 25 FEB 2014
+// 12 MAR 2014
// author: karl malbrain, malbrain@cal.berkeley.edu
typedef unsigned int uint;
#endif
-#define BT_latchtable 128 // number of latch manager slots
+#define BT_latchtable 8192 // number of latch manager slots
#define BT_ro 0x6f72 // ro
#define BT_rw 0x7772 // rw
// grant write lock when share == 0
volatile typedef struct {
- unsigned char mutex[1];
- unsigned char exclusive:1;
- unsigned char pending:1;
- ushort share;
+ ushort exclusive:1;
+ ushort pending:1;
+ ushort share:14;
} BtSpinLatch;
+#define XCL 1
+#define PEND 2
+#define BOTH 3
+#define SHARE 4
+
// hash table entries
typedef struct {
ptr = keyptr(page, page->cnt);
fprintf(stderr, "fence='%.*s'\n", ptr->len, ptr->key);
fprintf(stderr, "right=%.8x\n", bt_getid(page->right));
+ return bt->err = err;
}
-// Latch Manager
+// Spin Latch Manager
// wait until write lock mode is clear
// and add 1 to the share count
ushort prev;
do {
- // obtain latch mutex
#ifdef unix
- if( __sync_lock_test_and_set(latch->mutex, 1) )
- continue;
+ prev = __sync_fetch_and_add ((ushort *)latch, SHARE);
#else
- if( _InterlockedExchange8(latch->mutex, 1) )
- continue;
+ prev = _InterlockedExchangeAdd16((ushort *)latch, SHARE);
#endif
// see if exclusive request is granted or pending
- if( prev = !(latch->exclusive | latch->pending) )
- latch->share++;
-
+ if( !(prev & BOTH) )
+ return;
#ifdef unix
- *latch->mutex = 0;
+ prev = __sync_fetch_and_add ((ushort *)latch, -SHARE);
#else
- _InterlockedExchange8(latch->mutex, 0);
+ prev = _InterlockedExchangeAdd16((ushort *)latch, -SHARE);
#endif
-
- if( prev )
- return;
-
#ifdef unix
} while( sched_yield(), 1 );
#else
void bt_spinwritelock(BtSpinLatch *latch)
{
-uint prev;
+ushort prev;
do {
#ifdef unix
- if( __sync_lock_test_and_set(latch->mutex, 1) )
- continue;
+ prev = __sync_fetch_and_or((ushort *)latch, PEND | XCL);
#else
- if( _InterlockedExchange8(latch->mutex, 1) )
- continue;
+ prev = _InterlockedOr16((ushort *)latch, PEND | XCL);
#endif
- if( prev = !(latch->share | latch->exclusive) )
- latch->exclusive = 1, latch->pending = 0;
- else
- latch->pending = 1;
+ if( !(prev & XCL) )
+ if( !(prev & ~BOTH) )
+ return;
+ else
#ifdef unix
- *latch->mutex = 0;
+ __sync_fetch_and_and ((ushort *)latch, ~XCL);
#else
- _InterlockedExchange8(latch->mutex, 0);
+ _InterlockedAnd16((ushort *)latch, ~XCL);
#endif
- if( prev )
- return;
#ifdef unix
} while( sched_yield(), 1 );
#else
int bt_spinwritetry(BtSpinLatch *latch)
{
-uint prev;
+ushort prev;
-#ifdef unix
- if( __sync_lock_test_and_set(latch->mutex, 1) )
- return 0;
+#ifdef unix
+ prev = __sync_fetch_and_or((ushort *)latch, XCL);
#else
- if( _InterlockedExchange8(latch->mutex, 1) )
- return 0;
+ prev = _InterlockedOr16((ushort *)latch, XCL);
#endif
// take write access if all bits are clear
- if( prev = !(latch->exclusive | latch->share) )
- latch->exclusive = 1;
-
+ if( !(prev & XCL) )
+ if( !(prev & ~BOTH) )
+ return 1;
+ else
#ifdef unix
- *latch->mutex = 0;
+ __sync_fetch_and_and ((ushort *)latch, ~XCL);
#else
- _InterlockedExchange8(latch->mutex, 0);
+ _InterlockedAnd16((ushort *)latch, ~XCL);
#endif
- return prev;
+ return 0;
}
// clear write mode
void bt_spinreleasewrite(BtSpinLatch *latch)
{
#ifdef unix
- while( __sync_lock_test_and_set(latch->mutex, 1) )
- sched_yield();
-#else
- while( _InterlockedExchange8(latch->mutex, 1) )
- SwitchToThread();
-#endif
- latch->exclusive = 0;
-#ifdef unix
- *latch->mutex = 0;
+ __sync_fetch_and_and((ushort *)latch, ~BOTH);
#else
- _InterlockedExchange8(latch->mutex, 0);
+ _InterlockedAnd16((ushort *)latch, ~BOTH);
#endif
}
void bt_spinreleaseread(BtSpinLatch *latch)
{
#ifdef unix
- while( __sync_lock_test_and_set(latch->mutex, 1) )
- sched_yield();
+ __sync_fetch_and_add((ushort *)latch, -SHARE);
#else
- while( _InterlockedExchange8(latch->mutex, 1) )
- SwitchToThread();
-#endif
- latch->share--;
-#ifdef unix
- *latch->mutex = 0;
-#else
- _InterlockedExchange8(latch->mutex, 0);
+ _InterlockedExchangeAdd16((ushort *)latch, -SHARE);
#endif
}
void bt_close (BtDb *bt)
{
BtHash *hash;
+#ifdef unix
+ munmap (bt->latchsets, bt->latchmgr->nlatchpage * bt->page_size);
+ munmap (bt->latchmgr, bt->page_size);
+#else
+ FlushViewOfFile(bt->latchmgr, 0);
+ UnmapViewOfFile(bt->latchmgr);
+ CloseHandle(bt->halloc);
+#endif
#ifdef unix
// release mapped pages
#ifndef unix
SYSTEM_INFO sysinfo[1];
+OVERLAPPED ovl[1];
+uint len, flags;
+#else
+struct flock lock[1];
#endif
// determine sanity of page size and buffer pool
cacheblk = sysinfo->dwAllocationGranularity;
#endif
+#ifdef unix
+ memset (lock, 0, sizeof(lock));
+
+ lock->l_type = F_WRLCK;
+ lock->l_len = sizeof(struct BtPage_);
+ lock->l_whence = 0;
+
+ if( fcntl (bt->idx, F_SETLKW, lock) < 0 )
+ return bt_close (bt), NULL;
+#else
+ memset (ovl, 0, sizeof(ovl));
+ len = sizeof(struct BtPage_);
+
+ // use large offsets to
+ // simulate advisory locking
+
+ ovl->OffsetHigh |= 0x80000000;
+
+ if( mode == BtLockDelete || mode == BtLockWrite || mode == BtLockParent )
+ flags |= LOCKFILE_EXCLUSIVE_LOCK;
+
+ if( LockFileEx (bt->idx, flags, 0, len, 0L, ovl) )
+ return bt_close (bt), NULL;
+#endif
#ifdef unix
latchmgr = malloc (BT_maxpage);
*amt = 0;
bt->hashsize = hashsize;
- if( bt->nodemax = nodemax ) {
+ if( bt->nodemax = nodemax++ ) {
#ifdef unix
bt->nodes = calloc (nodemax, sizeof(BtHash));
bt->cache = calloc (hashsize, sizeof(ushort));
bt->mapped_io = 1;
}
- if( size || *amt )
+ if( size || *amt ) {
goto btlatch;
+ }
// initialize an empty b-tree with latch page, root page, page of leaves
// and page(s) of latches
memset (latchmgr, 0, 1 << bits);
- nlatchpage = BT_latchtable / (bt->page_size / sizeof(BtLatchSet)) + 1;
+
+ nlatchpage = BT_latchtable;
+ if( nlatchpage > nodemax )
+ nlatchpage = nodemax;
+ nlatchpage *= sizeof(BtLatchSet);
+ nlatchpage += bt->page_size - 1;
+ nlatchpage /= bt->page_size;
+
bt_putid(latchmgr->alloc->right, MIN_lvl+1+nlatchpage);
latchmgr->alloc->bits = bt->page_bits;
latchmgr->nlatchpage = nlatchpage;
- latchmgr->latchtotal = nlatchpage * (bt->page_size / sizeof(BtLatchSet));
+ latchmgr->latchtotal = nlatchpage * bt->page_size / sizeof(BtLatchSet);
// initialize latch manager
}
btlatch:
+#ifdef unix
+ lock->l_type = F_UNLCK;
+ if( fcntl (bt->idx, F_SETLK, lock) < 0 )
+ return bt_close (bt), NULL;
+#else
+ if( !UnlockFileEx (bt->idx, 0, sizeof(struct BtPage_), 0, ovl) )
+ return bt_close (bt), NULL;
+#endif
#ifdef unix
flag = PROT_READ | PROT_WRITE;
bt->latchmgr = mmap (0, bt->page_size, flag, MAP_SHARED, bt->idx, ALLOC_page * bt->page_size);
BtPage bt_linklru(BtDb *bt, BtHash *hash, uid page_no)
{
int flag;
-off64_t off = (page_no & ~bt->hashmask) << bt->page_bits;
+off64_t off = (page_no & ~(uid)bt->hashmask) << bt->page_bits;
off64_t limit = off + ((bt->hashmask+1) << bt->page_bits);
BtHash *node;
memset(hash, 0, sizeof(BtHash));
- hash->page_no = (page_no & ~bt->hashmask);
+ hash->page_no = (page_no & ~(uid)bt->hashmask);
bt_linkhash(bt, hash, page_no);
if( node = hash->lrunext = bt->lrufirst )
#ifdef unix
munmap (hash->page, (bt->hashmask+1) << bt->page_bits);
#else
- FlushViewOfFile(hash->page, 0);
+// FlushViewOfFile(hash->page, 0);
UnmapViewOfFile(hash->page);
CloseHandle(hash->hmap);
#endif
// re-read and re-lock root after determining actual level of root
- if( bt->page->lvl < drill) {
+ if( bt->page->lvl != drill) {
if( bt->page_no != ROOT_page )
return bt->err = BTERR_struct, 0;
BtKey ptr;
#ifdef unix
- if( *(uint *)(bt->latchmgr->lock) )
+ posix_fadvise( bt->idx, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+ if( *(ushort *)(bt->latchmgr->lock) )
fprintf(stderr, "Alloc page locked\n");
- *(uint *)(bt->latchmgr->lock) = 0;
+ *(ushort *)(bt->latchmgr->lock) = 0;
- for( idx = 1; idx < bt->latchmgr->latchdeployed; idx++ ) {
+ for( idx = 1; idx <= bt->latchmgr->latchdeployed; idx++ ) {
latch = bt->latchsets + idx;
- if( *(uint *)latch->readwr )
+ if( *(ushort *)latch->readwr )
fprintf(stderr, "latchset %d rwlocked for page %.8x\n", idx, latch->page_no);
- *(uint *)latch->readwr = 0;
+ *(ushort *)latch->readwr = 0;
- if( *(uint *)latch->access )
+ if( *(ushort *)latch->access )
fprintf(stderr, "latchset %d accesslocked for page %.8x\n", idx, latch->page_no);
- *(uint *)latch->access = 0;
+ *(ushort *)latch->access = 0;
- if( *(uint *)latch->parent )
+ if( *(ushort *)latch->parent )
fprintf(stderr, "latchset %d parentlocked for page %.8x\n", idx, latch->page_no);
- *(uint *)latch->parent = 0;
+ *(ushort *)latch->parent = 0;
if( latch->pin ) {
fprintf(stderr, "latchset %d pinned for page %.8x\n", idx, latch->page_no);
}
for( hashidx = 0; hashidx < bt->latchmgr->latchhash; hashidx++ ) {
- if( *(uint *)(bt->latchmgr->table[hashidx].latch) )
+ if( *(ushort *)(bt->latchmgr->table[hashidx].latch) )
fprintf(stderr, "hash entry %d locked\n", hashidx);
- *(uint *)(bt->latchmgr->table[hashidx].latch) = 0;
+ *(ushort *)(bt->latchmgr->table[hashidx].latch) = 0;
if( idx = bt->latchmgr->table[hashidx].slot ) do {
latch = bt->latchsets + idx;
- if( *(uint *)latch->busy )
+ if( *(ushort *)latch->busy )
fprintf(stderr, "latchset %d busylocked for page %.8x\n", idx, latch->page_no);
- *(uint *)latch->busy = 0;
+ *(ushort *)latch->busy = 0;
if( latch->hash != hashidx )
fprintf(stderr, "latchset %d wrong hashidx\n", idx);
if( latch->pin )
page_no = LEAF_page;
while( page_no < bt_getid(bt->latchmgr->alloc->right) ) {
- pread (bt->idx, bt->frame, bt->page_size, page_no << bt->page_bits);
+ off64_t off = page_no << bt->page_bits;
+#ifdef unix
+ pread (bt->idx, bt->frame, bt->page_size, off);
+#else
+ DWORD amt[1];
+
+ SetFilePointer (bt->idx, (long)off, (long*)(&off)+1, FILE_BEGIN);
+
+ if( !ReadFile(bt->idx, bt->frame, bt->page_size, amt, NULL))
+ fprintf(stderr, "page %.8x unable to read\n", page_no);
+
+ if( *amt < bt->page_size )
+ fprintf(stderr, "page %.8x unable to read\n", page_no);
+#endif
if( !bt->frame->free ) {
for( idx = 0; idx++ < bt->frame->cnt - 1; ) {
ptr = keyptr(bt->frame, idx+1);
page_no = next;
}
return cnt - 1;
-#endif
}
#ifndef unix
break;
case 's':
- scan++;
+ fprintf(stderr, "started scaning\n");
+ cnt = len = key[0] = 0;
- case 'c':
- cnt = 0;
+ if( slot = bt_startkey (bt, key, len) )
+ slot--;
+ else
+ fprintf(stderr, "Error %d in StartKey. Syserror: %d\n", bt->err, errno), exit(0);
+
+ while( slot = bt_nextkey (bt, slot) ) {
+ ptr = bt_key(bt, slot);
+ fwrite (ptr->key, ptr->len, 1, stdout);
+ fputc ('\n', stdout);
+ cnt++;
+ }
- fprintf(stderr, "started reading\n");
+ fprintf(stderr, " Total keys read %d\n", cnt - 1);
+ break;
+
+ case 'c':
+ fprintf(stderr, "started counting\n");
next = bt->latchmgr->nlatchpage + LATCH_page;
page_no = LEAF_page;
+ cnt = 0;
while( page_no < bt_getid(bt->latchmgr->alloc->right) ) {
uid off = page_no << bt->page_bits;
SetFilePointer (bt->idx, (long)off, (long*)(&off)+1, FILE_BEGIN);
if( !ReadFile(bt->idx, bt->frame, bt->page_size, amt, NULL))
- return bt->err = BTERR_map;
+ fprintf (stderr, "unable to read page %.8x", page_no);
if( *amt < bt->page_size )
- return bt->err = BTERR_map;
+ fprintf (stderr, "unable to read page %.8x", page_no);
#endif
if( !bt->frame->free && !bt->frame->lvl )
cnt += bt->frame->act;