]> pd.if.org Git - btree/blobdiff - btree2t.c
include newest lite weight latch code.
[btree] / btree2t.c
index 71387123e03410d29d71e13eecfd98920c89ef61..bb0e50f06a7ac976440afd177e5b75263624e7b8 100644 (file)
--- a/btree2t.c
+++ b/btree2t.c
@@ -1,6 +1,6 @@
-// btree version 2t
+// btree version 2t  sched_yield version of spinlocks
 //     with reworked bt_deletekey code
-// 25 FEB 2014
+// 12 MAR 2014
 
 // author: karl malbrain, malbrain@cal.berkeley.edu
 
@@ -91,12 +91,16 @@ typedef enum{
 // grant write lock when share == 0
 
 volatile typedef struct {
-       unsigned char mutex[1];
-       unsigned char exclusive:1;
-       unsigned char pending:1;
-       ushort share;
+       ushort exclusive:1;
+       ushort pending:1;
+       ushort share:14;
 } BtSpinLatch;
 
+#define XCL 1
+#define PEND 2
+#define BOTH 3
+#define SHARE 4
+
 //  hash table entries
 
 typedef struct {
@@ -355,9 +359,10 @@ BtKey ptr;
        ptr = keyptr(page, page->cnt);
        fprintf(stderr, "fence='%.*s'\n", ptr->len, ptr->key);
        fprintf(stderr, "right=%.8x\n", bt_getid(page->right));
+       return bt->err = err;
 }
 
-//     Latch Manager
+//     Spin Latch Manager
 
 //     wait until write lock mode is clear
 //     and add 1 to the share count
@@ -367,28 +372,20 @@ void bt_spinreadlock(BtSpinLatch *latch)
 ushort prev;
 
   do {
-       //      obtain latch mutex
 #ifdef unix
-       if( __sync_lock_test_and_set(latch->mutex, 1) )
-               continue;
+       prev = __sync_fetch_and_add ((ushort *)latch, SHARE);
 #else
-       if( _InterlockedExchange8(latch->mutex, 1) )
-               continue;
+       prev = _InterlockedExchangeAdd16((ushort *)latch, SHARE);
 #endif
        //  see if exclusive request is granted or pending
 
-       if( prev = !(latch->exclusive | latch->pending) )
-               latch->share++;
-
+       if( !(prev & BOTH) )
+               return;
 #ifdef unix
-       *latch->mutex = 0;
+       prev = __sync_fetch_and_add ((ushort *)latch, -SHARE);
 #else
-       _InterlockedExchange8(latch->mutex, 0);
+       prev = _InterlockedExchangeAdd16((ushort *)latch, -SHARE);
 #endif
-
-       if( prev )
-               return;
-
 #ifdef  unix
   } while( sched_yield(), 1 );
 #else
@@ -400,27 +397,23 @@ ushort prev;
 
 void bt_spinwritelock(BtSpinLatch *latch)
 {
-uint prev;
+ushort prev;
 
   do {
 #ifdef  unix
-       if( __sync_lock_test_and_set(latch->mutex, 1) )
-               continue;
+       prev = __sync_fetch_and_or((ushort *)latch, PEND | XCL);
 #else
-       if( _InterlockedExchange8(latch->mutex, 1) )
-               continue;
+       prev = _InterlockedOr16((ushort *)latch, PEND | XCL);
 #endif
-       if( prev = !(latch->share | latch->exclusive) )
-               latch->exclusive = 1, latch->pending = 0;
-       else
-               latch->pending = 1;
+       if( !(prev & XCL) )
+         if( !(prev & ~BOTH) )
+               return;
+         else
 #ifdef unix
-       *latch->mutex = 0;
+               __sync_fetch_and_and ((ushort *)latch, ~XCL);
 #else
-       _InterlockedExchange8(latch->mutex, 0);
+               _InterlockedAnd16((ushort *)latch, ~XCL);
 #endif
-       if( prev )
-               return;
 #ifdef  unix
   } while( sched_yield(), 1 );
 #else
@@ -435,26 +428,25 @@ uint prev;
 
 int bt_spinwritetry(BtSpinLatch *latch)
 {
-uint prev;
+ushort prev;
 
-#ifdef unix
-       if( __sync_lock_test_and_set(latch->mutex, 1) )
-               return 0;
+#ifdef  unix
+       prev = __sync_fetch_and_or((ushort *)latch, XCL);
 #else
-       if( _InterlockedExchange8(latch->mutex, 1) )
-               return 0;
+       prev = _InterlockedOr16((ushort *)latch, XCL);
 #endif
        //      take write access if all bits are clear
 
-       if( prev = !(latch->exclusive | latch->share) )
-               latch->exclusive = 1;
-
+       if( !(prev & XCL) )
+         if( !(prev & ~BOTH) )
+               return 1;
+         else
 #ifdef unix
-       *latch->mutex = 0;
+               __sync_fetch_and_and ((ushort *)latch, ~XCL);
 #else
-       _InterlockedExchange8(latch->mutex, 0);
+               _InterlockedAnd16((ushort *)latch, ~XCL);
 #endif
-       return prev;
+       return 0;
 }
 
 //     clear write mode
@@ -462,17 +454,9 @@ uint prev;
 void bt_spinreleasewrite(BtSpinLatch *latch)
 {
 #ifdef unix
-       while( __sync_lock_test_and_set(latch->mutex, 1) )
-               sched_yield();
+       __sync_fetch_and_and((ushort *)latch, ~BOTH);
 #else
-       while( _InterlockedExchange8(latch->mutex, 1) )
-               SwitchToThread();
-#endif
-       latch->exclusive = 0;
-#ifdef unix
-       *latch->mutex = 0;
-#else
-       _InterlockedExchange8(latch->mutex, 0);
+       _InterlockedAnd16((ushort *)latch, ~BOTH);
 #endif
 }
 
@@ -481,17 +465,9 @@ void bt_spinreleasewrite(BtSpinLatch *latch)
 void bt_spinreleaseread(BtSpinLatch *latch)
 {
 #ifdef unix
-       while( __sync_lock_test_and_set(latch->mutex, 1) )
-               sched_yield();
-#else
-       while( _InterlockedExchange8(latch->mutex, 1) )
-               SwitchToThread();
-#endif
-       latch->share--;
-#ifdef unix
-       *latch->mutex = 0;
+       __sync_fetch_and_add((ushort *)latch, -SHARE);
 #else
-       _InterlockedExchange8(latch->mutex, 0);
+       _InterlockedExchangeAdd16((ushort *)latch, -SHARE);
 #endif
 }
 
@@ -671,6 +647,14 @@ BtLatchSet *latch;
 void bt_close (BtDb *bt)
 {
 BtHash *hash;
+#ifdef unix
+       munmap (bt->latchsets, bt->latchmgr->nlatchpage * bt->page_size);
+       munmap (bt->latchmgr, bt->page_size);
+#else
+       FlushViewOfFile(bt->latchmgr, 0);
+       UnmapViewOfFile(bt->latchmgr);
+       CloseHandle(bt->halloc);
+#endif
 #ifdef unix
        // release mapped pages
 
@@ -718,6 +702,10 @@ int flag;
 
 #ifndef unix
 SYSTEM_INFO sysinfo[1];
+OVERLAPPED ovl[1];
+uint len, flags;
+#else
+struct flock lock[1];
 #endif
 
        // determine sanity of page size and buffer pool
@@ -750,6 +738,30 @@ SYSTEM_INFO sysinfo[1];
        cacheblk = sysinfo->dwAllocationGranularity;
 #endif
 
+#ifdef unix
+       memset (lock, 0, sizeof(lock));
+
+       lock->l_type = F_WRLCK;
+       lock->l_len = sizeof(struct BtPage_);
+       lock->l_whence = 0;
+
+       if( fcntl (bt->idx, F_SETLKW, lock) < 0 )
+               return bt_close (bt), NULL;
+#else
+       memset (ovl, 0, sizeof(ovl));
+       len = sizeof(struct BtPage_);
+
+       //      use large offsets to
+       //      simulate advisory locking
+
+       ovl->OffsetHigh |= 0x80000000;
+
+       if( mode == BtLockDelete || mode == BtLockWrite || mode == BtLockParent )
+               flags |= LOCKFILE_EXCLUSIVE_LOCK;
+
+       if( LockFileEx (bt->idx, flags, 0, len, 0L, ovl) )
+               return bt_close (bt), NULL;
+#endif 
 #ifdef unix
        latchmgr = malloc (BT_maxpage);
        *amt = 0;
@@ -810,8 +822,9 @@ SYSTEM_INFO sysinfo[1];
          bt->mapped_io = 1;
        }
 
-       if( size || *amt )
+       if( size || *amt ) {
                goto btlatch;
+       }
 
        // initialize an empty b-tree with latch page, root page, page of leaves
        // and page(s) of latches
@@ -892,6 +905,14 @@ SYSTEM_INFO sysinfo[1];
        }
 
 btlatch:
+#ifdef unix
+       lock->l_type = F_UNLCK;
+       if( fcntl (bt->idx, F_SETLK, lock) < 0 )
+               return bt_close (bt), NULL;
+#else
+       if( !UnlockFileEx (bt->idx, 0, sizeof(struct BtPage_), 0, ovl) )
+               return bt_close (bt), NULL;
+#endif
 #ifdef unix
        flag = PROT_READ | PROT_WRITE;
        bt->latchmgr = mmap (0, bt->page_size, flag, MAP_SHARED, bt->idx, ALLOC_page * bt->page_size);
@@ -1389,7 +1410,7 @@ uint mode, prevmode;
 
        // re-read and re-lock root after determining actual level of root
 
-       if( bt->page->lvl < drill) {
+       if( bt->page->lvl != drill) {
                if( bt->page_no != ROOT_page )
                        return bt->err = BTERR_struct, 0;
                        
@@ -2088,23 +2109,25 @@ uint cnt = 0;
 BtKey ptr;
 
 #ifdef unix
-       if( *(uint *)(bt->latchmgr->lock) )
+       posix_fadvise( bt->idx, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+       if( *(ushort *)(bt->latchmgr->lock) )
                fprintf(stderr, "Alloc page locked\n");
-       *(uint *)(bt->latchmgr->lock) = 0;
+       *(ushort *)(bt->latchmgr->lock) = 0;
 
-       for( idx = 1; idx < bt->latchmgr->latchdeployed; idx++ ) {
+       for( idx = 1; idx <= bt->latchmgr->latchdeployed; idx++ ) {
                latch = bt->latchsets + idx;
-               if( *(uint *)latch->readwr )
+               if( *(ushort *)latch->readwr )
                        fprintf(stderr, "latchset %d rwlocked for page %.8x\n", idx, latch->page_no);
-               *(uint *)latch->readwr = 0;
+               *(ushort *)latch->readwr = 0;
 
-               if( *(uint *)latch->access )
+               if( *(ushort *)latch->access )
                        fprintf(stderr, "latchset %d accesslocked for page %.8x\n", idx, latch->page_no);
-               *(uint *)latch->access = 0;
+               *(ushort *)latch->access = 0;
 
-               if( *(uint *)latch->parent )
+               if( *(ushort *)latch->parent )
                        fprintf(stderr, "latchset %d parentlocked for page %.8x\n", idx, latch->page_no);
-               *(uint *)latch->parent = 0;
+               *(ushort *)latch->parent = 0;
 
                if( latch->pin ) {
                        fprintf(stderr, "latchset %d pinned for page %.8x\n", idx, latch->page_no);
@@ -2113,16 +2136,16 @@ BtKey ptr;
        }
 
        for( hashidx = 0; hashidx < bt->latchmgr->latchhash; hashidx++ ) {
-         if( *(uint *)(bt->latchmgr->table[hashidx].latch) )
+         if( *(ushort *)(bt->latchmgr->table[hashidx].latch) )
                        fprintf(stderr, "hash entry %d locked\n", hashidx);
 
-         *(uint *)(bt->latchmgr->table[hashidx].latch) = 0;
+         *(ushort *)(bt->latchmgr->table[hashidx].latch) = 0;
 
          if( idx = bt->latchmgr->table[hashidx].slot ) do {
                latch = bt->latchsets + idx;
-               if( *(uint *)latch->busy )
+               if( *(ushort *)latch->busy )
                        fprintf(stderr, "latchset %d busylocked for page %.8x\n", idx, latch->page_no);
-               *(uint *)latch->busy = 0;
+               *(ushort *)latch->busy = 0;
                if( latch->hash != hashidx )
                        fprintf(stderr, "latchset %d wrong hashidx\n", idx);
                if( latch->pin )
@@ -2134,7 +2157,20 @@ BtKey ptr;
        page_no = LEAF_page;
 
        while( page_no < bt_getid(bt->latchmgr->alloc->right) ) {
-               pread (bt->idx, bt->frame, bt->page_size, page_no << bt->page_bits);
+       off64_t off = page_no << bt->page_bits;
+#ifdef unix
+               pread (bt->idx, bt->frame, bt->page_size, off);
+#else
+               DWORD amt[1];
+
+                 SetFilePointer (bt->idx, (long)off, (long*)(&off)+1, FILE_BEGIN);
+
+                 if( !ReadFile(bt->idx, bt->frame, bt->page_size, amt, NULL))
+                       fprintf(stderr, "page %.8x unable to read\n", page_no);
+
+                 if( *amt <  bt->page_size )
+                       fprintf(stderr, "page %.8x unable to read\n", page_no);
+#endif
                if( !bt->frame->free ) {
                 for( idx = 0; idx++ < bt->frame->cnt - 1; ) {
                  ptr = keyptr(bt->frame, idx+1);
@@ -2150,7 +2186,6 @@ BtKey ptr;
                page_no = next;
        }
        return cnt - 1;
-#endif
 }
 
 #ifndef unix
@@ -2338,15 +2373,30 @@ FILE *in;
                break;
 
        case 's':
-               scan++;
+               fprintf(stderr, "started scaning\n");
+               cnt = len = key[0] = 0;
 
-       case 'c':
-         cnt = 0;
+               if( slot = bt_startkey (bt, key, len) )
+                 slot--;
+               else
+                 fprintf(stderr, "Error %d in StartKey. Syserror: %d\n", bt->err, errno), exit(0);
+
+               while( slot = bt_nextkey (bt, slot) ) {
+                       ptr = bt_key(bt, slot);
+                       fwrite (ptr->key, ptr->len, 1, stdout);
+                       fputc ('\n', stdout);
+                       cnt++;
+               }
 
-         fprintf(stderr, "started reading\n");
+               fprintf(stderr, " Total keys read %d\n", cnt - 1);
+               break;
+
+       case 'c':
+         fprintf(stderr, "started counting\n");
 
          next = bt->latchmgr->nlatchpage + LATCH_page;
          page_no = LEAF_page;
+         cnt = 0;
 
          while( page_no < bt_getid(bt->latchmgr->alloc->right) ) {
          uid off = page_no << bt->page_bits;
@@ -2358,10 +2408,10 @@ FILE *in;
                SetFilePointer (bt->idx, (long)off, (long*)(&off)+1, FILE_BEGIN);
 
                if( !ReadFile(bt->idx, bt->frame, bt->page_size, amt, NULL))
-                       return bt->err = BTERR_map;
+                       fprintf (stderr, "unable to read page %.8x", page_no);
 
                if( *amt <  bt->page_size )
-                       return bt->err = BTERR_map;
+                       fprintf (stderr, "unable to read page %.8x", page_no);
 #endif
                if( !bt->frame->free && !bt->frame->lvl )
                        cnt += bt->frame->act;