X-Git-Url: https://pd.if.org/git/?p=btree;a=blobdiff_plain;f=btree2t.c;h=1a544e956f88970c9157ffb25c2a9d58104a951e;hp=71387123e03410d29d71e13eecfd98920c89ef61;hb=HEAD;hpb=2898cdccd12c90cf665b9d75ab73be7c298965e3 diff --git a/btree2t.c b/btree2t.c index 7138712..1a544e9 100644 --- a/btree2t.c +++ b/btree2t.c @@ -1,6 +1,6 @@ -// btree version 2t +// btree version 2t sched_yield version of spinlocks // with reworked bt_deletekey code -// 25 FEB 2014 +// 12 MAR 2014 // author: karl malbrain, malbrain@cal.berkeley.edu @@ -56,7 +56,7 @@ typedef unsigned short ushort; typedef unsigned int uint; #endif -#define BT_latchtable 128 // number of latch manager slots +#define BT_latchtable 8192 // number of latch manager slots #define BT_ro 0x6f72 // ro #define BT_rw 0x7772 // rw @@ -91,12 +91,16 @@ typedef enum{ // grant write lock when share == 0 volatile typedef struct { - unsigned char mutex[1]; - unsigned char exclusive:1; - unsigned char pending:1; - ushort share; + ushort exclusive:1; + ushort pending:1; + ushort share:14; } BtSpinLatch; +#define XCL 1 +#define PEND 2 +#define BOTH 3 +#define SHARE 4 + // hash table entries typedef struct { @@ -355,9 +359,10 @@ BtKey ptr; ptr = keyptr(page, page->cnt); fprintf(stderr, "fence='%.*s'\n", ptr->len, ptr->key); fprintf(stderr, "right=%.8x\n", bt_getid(page->right)); + return bt->err = err; } -// Latch Manager +// Spin Latch Manager // wait until write lock mode is clear // and add 1 to the share count @@ -367,28 +372,20 @@ void bt_spinreadlock(BtSpinLatch *latch) ushort prev; do { - // obtain latch mutex #ifdef unix - if( __sync_lock_test_and_set(latch->mutex, 1) ) - continue; + prev = __sync_fetch_and_add ((ushort *)latch, SHARE); #else - if( _InterlockedExchange8(latch->mutex, 1) ) - continue; + prev = _InterlockedExchangeAdd16((ushort *)latch, SHARE); #endif // see if exclusive request is granted or pending - if( prev = !(latch->exclusive | latch->pending) ) - latch->share++; - + if( !(prev & BOTH) ) + return; #ifdef unix - *latch->mutex = 0; + prev = __sync_fetch_and_add ((ushort *)latch, -SHARE); #else - _InterlockedExchange8(latch->mutex, 0); + prev = _InterlockedExchangeAdd16((ushort *)latch, -SHARE); #endif - - if( prev ) - return; - #ifdef unix } while( sched_yield(), 1 ); #else @@ -400,27 +397,23 @@ ushort prev; void bt_spinwritelock(BtSpinLatch *latch) { -uint prev; +ushort prev; do { #ifdef unix - if( __sync_lock_test_and_set(latch->mutex, 1) ) - continue; + prev = __sync_fetch_and_or((ushort *)latch, PEND | XCL); #else - if( _InterlockedExchange8(latch->mutex, 1) ) - continue; + prev = _InterlockedOr16((ushort *)latch, PEND | XCL); #endif - if( prev = !(latch->share | latch->exclusive) ) - latch->exclusive = 1, latch->pending = 0; - else - latch->pending = 1; + if( !(prev & XCL) ) + if( !(prev & ~BOTH) ) + return; + else #ifdef unix - *latch->mutex = 0; + __sync_fetch_and_and ((ushort *)latch, ~XCL); #else - _InterlockedExchange8(latch->mutex, 0); + _InterlockedAnd16((ushort *)latch, ~XCL); #endif - if( prev ) - return; #ifdef unix } while( sched_yield(), 1 ); #else @@ -435,26 +428,25 @@ uint prev; int bt_spinwritetry(BtSpinLatch *latch) { -uint prev; +ushort prev; -#ifdef unix - if( __sync_lock_test_and_set(latch->mutex, 1) ) - return 0; +#ifdef unix + prev = __sync_fetch_and_or((ushort *)latch, XCL); #else - if( _InterlockedExchange8(latch->mutex, 1) ) - return 0; + prev = _InterlockedOr16((ushort *)latch, XCL); #endif // take write access if all bits are clear - if( prev = !(latch->exclusive | latch->share) ) - latch->exclusive = 1; - + if( !(prev & XCL) ) + if( !(prev & ~BOTH) ) + return 1; + else #ifdef unix - *latch->mutex = 0; + __sync_fetch_and_and ((ushort *)latch, ~XCL); #else - _InterlockedExchange8(latch->mutex, 0); + _InterlockedAnd16((ushort *)latch, ~XCL); #endif - return prev; + return 0; } // clear write mode @@ -462,17 +454,9 @@ uint prev; void bt_spinreleasewrite(BtSpinLatch *latch) { #ifdef unix - while( __sync_lock_test_and_set(latch->mutex, 1) ) - sched_yield(); -#else - while( _InterlockedExchange8(latch->mutex, 1) ) - SwitchToThread(); -#endif - latch->exclusive = 0; -#ifdef unix - *latch->mutex = 0; + __sync_fetch_and_and((ushort *)latch, ~BOTH); #else - _InterlockedExchange8(latch->mutex, 0); + _InterlockedAnd16((ushort *)latch, ~BOTH); #endif } @@ -481,17 +465,9 @@ void bt_spinreleasewrite(BtSpinLatch *latch) void bt_spinreleaseread(BtSpinLatch *latch) { #ifdef unix - while( __sync_lock_test_and_set(latch->mutex, 1) ) - sched_yield(); + __sync_fetch_and_add((ushort *)latch, -SHARE); #else - while( _InterlockedExchange8(latch->mutex, 1) ) - SwitchToThread(); -#endif - latch->share--; -#ifdef unix - *latch->mutex = 0; -#else - _InterlockedExchange8(latch->mutex, 0); + _InterlockedExchangeAdd16((ushort *)latch, -SHARE); #endif } @@ -671,6 +647,14 @@ BtLatchSet *latch; void bt_close (BtDb *bt) { BtHash *hash; +#ifdef unix + munmap (bt->latchsets, bt->latchmgr->nlatchpage * bt->page_size); + munmap (bt->latchmgr, bt->page_size); +#else + FlushViewOfFile(bt->latchmgr, 0); + UnmapViewOfFile(bt->latchmgr); + CloseHandle(bt->halloc); +#endif #ifdef unix // release mapped pages @@ -718,6 +702,10 @@ int flag; #ifndef unix SYSTEM_INFO sysinfo[1]; +OVERLAPPED ovl[1]; +uint len, flags; +#else +struct flock lock[1]; #endif // determine sanity of page size and buffer pool @@ -750,6 +738,30 @@ SYSTEM_INFO sysinfo[1]; cacheblk = sysinfo->dwAllocationGranularity; #endif +#ifdef unix + memset (lock, 0, sizeof(lock)); + + lock->l_type = F_WRLCK; + lock->l_len = sizeof(struct BtPage_); + lock->l_whence = 0; + + if( fcntl (bt->idx, F_SETLKW, lock) < 0 ) + return bt_close (bt), NULL; +#else + memset (ovl, 0, sizeof(ovl)); + len = sizeof(struct BtPage_); + + // use large offsets to + // simulate advisory locking + + ovl->OffsetHigh |= 0x80000000; + + if( mode == BtLockDelete || mode == BtLockWrite || mode == BtLockParent ) + flags |= LOCKFILE_EXCLUSIVE_LOCK; + + if( LockFileEx (bt->idx, flags, 0, len, 0L, ovl) ) + return bt_close (bt), NULL; +#endif #ifdef unix latchmgr = malloc (BT_maxpage); *amt = 0; @@ -799,7 +811,7 @@ SYSTEM_INFO sysinfo[1]; bt->hashsize = hashsize; - if( bt->nodemax = nodemax ) { + if( bt->nodemax = nodemax++ ) { #ifdef unix bt->nodes = calloc (nodemax, sizeof(BtHash)); bt->cache = calloc (hashsize, sizeof(ushort)); @@ -810,19 +822,27 @@ SYSTEM_INFO sysinfo[1]; bt->mapped_io = 1; } - if( size || *amt ) + if( size || *amt ) { goto btlatch; + } // initialize an empty b-tree with latch page, root page, page of leaves // and page(s) of latches memset (latchmgr, 0, 1 << bits); - nlatchpage = BT_latchtable / (bt->page_size / sizeof(BtLatchSet)) + 1; + + nlatchpage = BT_latchtable; + if( nlatchpage > nodemax ) + nlatchpage = nodemax; + nlatchpage *= sizeof(BtLatchSet); + nlatchpage += bt->page_size - 1; + nlatchpage /= bt->page_size; + bt_putid(latchmgr->alloc->right, MIN_lvl+1+nlatchpage); latchmgr->alloc->bits = bt->page_bits; latchmgr->nlatchpage = nlatchpage; - latchmgr->latchtotal = nlatchpage * (bt->page_size / sizeof(BtLatchSet)); + latchmgr->latchtotal = nlatchpage * bt->page_size / sizeof(BtLatchSet); // initialize latch manager @@ -892,6 +912,14 @@ SYSTEM_INFO sysinfo[1]; } btlatch: +#ifdef unix + lock->l_type = F_UNLCK; + if( fcntl (bt->idx, F_SETLK, lock) < 0 ) + return bt_close (bt), NULL; +#else + if( !UnlockFileEx (bt->idx, 0, sizeof(struct BtPage_), 0, ovl) ) + return bt_close (bt), NULL; +#endif #ifdef unix flag = PROT_READ | PROT_WRITE; bt->latchmgr = mmap (0, bt->page_size, flag, MAP_SHARED, bt->idx, ALLOC_page * bt->page_size); @@ -1152,12 +1180,12 @@ BtHash *hash; BtPage bt_linklru(BtDb *bt, BtHash *hash, uid page_no) { int flag; -off64_t off = (page_no & ~bt->hashmask) << bt->page_bits; +off64_t off = (page_no & ~(uid)bt->hashmask) << bt->page_bits; off64_t limit = off + ((bt->hashmask+1) << bt->page_bits); BtHash *node; memset(hash, 0, sizeof(BtHash)); - hash->page_no = (page_no & ~bt->hashmask); + hash->page_no = (page_no & ~(uid)bt->hashmask); bt_linkhash(bt, hash, page_no); if( node = hash->lrunext = bt->lrufirst ) @@ -1237,7 +1265,7 @@ BtPage page; #ifdef unix munmap (hash->page, (bt->hashmask+1) << bt->page_bits); #else - FlushViewOfFile(hash->page, 0); +// FlushViewOfFile(hash->page, 0); UnmapViewOfFile(hash->page); CloseHandle(hash->hmap); #endif @@ -1389,7 +1417,7 @@ uint mode, prevmode; // re-read and re-lock root after determining actual level of root - if( bt->page->lvl < drill) { + if( bt->page->lvl != drill) { if( bt->page_no != ROOT_page ) return bt->err = BTERR_struct, 0; @@ -2088,23 +2116,25 @@ uint cnt = 0; BtKey ptr; #ifdef unix - if( *(uint *)(bt->latchmgr->lock) ) + posix_fadvise( bt->idx, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + if( *(ushort *)(bt->latchmgr->lock) ) fprintf(stderr, "Alloc page locked\n"); - *(uint *)(bt->latchmgr->lock) = 0; + *(ushort *)(bt->latchmgr->lock) = 0; - for( idx = 1; idx < bt->latchmgr->latchdeployed; idx++ ) { + for( idx = 1; idx <= bt->latchmgr->latchdeployed; idx++ ) { latch = bt->latchsets + idx; - if( *(uint *)latch->readwr ) + if( *(ushort *)latch->readwr ) fprintf(stderr, "latchset %d rwlocked for page %.8x\n", idx, latch->page_no); - *(uint *)latch->readwr = 0; + *(ushort *)latch->readwr = 0; - if( *(uint *)latch->access ) + if( *(ushort *)latch->access ) fprintf(stderr, "latchset %d accesslocked for page %.8x\n", idx, latch->page_no); - *(uint *)latch->access = 0; + *(ushort *)latch->access = 0; - if( *(uint *)latch->parent ) + if( *(ushort *)latch->parent ) fprintf(stderr, "latchset %d parentlocked for page %.8x\n", idx, latch->page_no); - *(uint *)latch->parent = 0; + *(ushort *)latch->parent = 0; if( latch->pin ) { fprintf(stderr, "latchset %d pinned for page %.8x\n", idx, latch->page_no); @@ -2113,16 +2143,16 @@ BtKey ptr; } for( hashidx = 0; hashidx < bt->latchmgr->latchhash; hashidx++ ) { - if( *(uint *)(bt->latchmgr->table[hashidx].latch) ) + if( *(ushort *)(bt->latchmgr->table[hashidx].latch) ) fprintf(stderr, "hash entry %d locked\n", hashidx); - *(uint *)(bt->latchmgr->table[hashidx].latch) = 0; + *(ushort *)(bt->latchmgr->table[hashidx].latch) = 0; if( idx = bt->latchmgr->table[hashidx].slot ) do { latch = bt->latchsets + idx; - if( *(uint *)latch->busy ) + if( *(ushort *)latch->busy ) fprintf(stderr, "latchset %d busylocked for page %.8x\n", idx, latch->page_no); - *(uint *)latch->busy = 0; + *(ushort *)latch->busy = 0; if( latch->hash != hashidx ) fprintf(stderr, "latchset %d wrong hashidx\n", idx); if( latch->pin ) @@ -2134,7 +2164,20 @@ BtKey ptr; page_no = LEAF_page; while( page_no < bt_getid(bt->latchmgr->alloc->right) ) { - pread (bt->idx, bt->frame, bt->page_size, page_no << bt->page_bits); + off64_t off = page_no << bt->page_bits; +#ifdef unix + pread (bt->idx, bt->frame, bt->page_size, off); +#else + DWORD amt[1]; + + SetFilePointer (bt->idx, (long)off, (long*)(&off)+1, FILE_BEGIN); + + if( !ReadFile(bt->idx, bt->frame, bt->page_size, amt, NULL)) + fprintf(stderr, "page %.8x unable to read\n", page_no); + + if( *amt < bt->page_size ) + fprintf(stderr, "page %.8x unable to read\n", page_no); +#endif if( !bt->frame->free ) { for( idx = 0; idx++ < bt->frame->cnt - 1; ) { ptr = keyptr(bt->frame, idx+1); @@ -2150,7 +2193,6 @@ BtKey ptr; page_no = next; } return cnt - 1; -#endif } #ifndef unix @@ -2338,15 +2380,30 @@ FILE *in; break; case 's': - scan++; + fprintf(stderr, "started scaning\n"); + cnt = len = key[0] = 0; - case 'c': - cnt = 0; + if( slot = bt_startkey (bt, key, len) ) + slot--; + else + fprintf(stderr, "Error %d in StartKey. Syserror: %d\n", bt->err, errno), exit(0); + + while( slot = bt_nextkey (bt, slot) ) { + ptr = bt_key(bt, slot); + fwrite (ptr->key, ptr->len, 1, stdout); + fputc ('\n', stdout); + cnt++; + } - fprintf(stderr, "started reading\n"); + fprintf(stderr, " Total keys read %d\n", cnt - 1); + break; + + case 'c': + fprintf(stderr, "started counting\n"); next = bt->latchmgr->nlatchpage + LATCH_page; page_no = LEAF_page; + cnt = 0; while( page_no < bt_getid(bt->latchmgr->alloc->right) ) { uid off = page_no << bt->page_bits; @@ -2358,10 +2415,10 @@ FILE *in; SetFilePointer (bt->idx, (long)off, (long*)(&off)+1, FILE_BEGIN); if( !ReadFile(bt->idx, bt->frame, bt->page_size, amt, NULL)) - return bt->err = BTERR_map; + fprintf (stderr, "unable to read page %.8x", page_no); if( *amt < bt->page_size ) - return bt->err = BTERR_map; + fprintf (stderr, "unable to read page %.8x", page_no); #endif if( !bt->frame->free && !bt->frame->lvl ) cnt += bt->frame->act;