ushort serving[1];
} RWLock;
+// write only queue lock
+
+typedef struct {
+ ushort ticket[1];
+ ushort serving[1];
+ ushort tid;
+ ushort dup;
+} WOLock;
+
#define PHID 0x1
#define PRES 0x2
#define MASK 0x3
uid page_no; // latch set page number
RWLock readwr[1]; // read/write page lock
RWLock access[1]; // Access Intent/Page delete
- RWLock parent[1]; // Posting of fence key in parent
- RWLock atomic[1]; // Atomic update in progress
+ WOLock parent[1]; // Posting of fence key in parent
+ WOLock atomic[1]; // Atomic update in progress
uint split; // right split page atomic insert
uint entry; // entry slot in latch table
uint next; // next entry in hash table chain
uint latchtotal; // number of page latch entries
uint latchhash; // number of latch hash table slots
uint latchvictim; // next latch entry to examine
+ ushort thread_no[1]; // next thread number
BtHashEntry *hashtable; // the buffer pool hash table entries
BtLatchSet *latchsets; // mapped latch set from buffer pool
unsigned char *pagepool; // mapped to the buffer pool pages
int found; // last delete or insert was found
int err; // last error
int reads, writes; // number of reads and writes from the btree
+ ushort thread_no; // thread number
} BtDb;
typedef enum {
#endif
}
+// Write-Only Queue Lock
+
+void WriteOLock (WOLock *lock, ushort tid)
+{
+ushort tix;
+
+ if( lock->tid == tid ) {
+ lock->dup++;
+ return;
+ }
+#ifdef unix
+ tix = __sync_fetch_and_add (lock->ticket, 1);
+#else
+ tix = _InterlockedExchangeAdd16 (lock->ticket, 1);
+#endif
+ // wait for our ticket to come up
+
+ while( tix != lock->serving[0] )
+#ifdef unix
+ sched_yield();
+#else
+ SwitchToThread ();
+#endif
+ lock->tid = tid;
+}
+
+void WriteORelease (WOLock *lock)
+{
+ if( lock->dup ) {
+ lock->dup--;
+ return;
+ }
+
+ lock->tid = 0;
+ lock->serving[0]++;
+}
+
// Phase-Fair reader/writer lock implementation
void WriteLock (RWLock *lock)
#endif
bt->frame = (BtPage)bt->mem;
bt->cursor = (BtPage)(bt->mem + 1 * mgr->page_size);
+#ifdef unix
+ bt->thread_no = __sync_fetch_and_add (mgr->thread_no, 1) + 1;
+#else
+ bt->thread_no = _InterlockedIncrement16(mgr->thread_no, 1);
+#endif
return bt;
}
// place write, read, or parent lock on requested page_no.
-void bt_lockpage(BtLock mode, BtLatchSet *latch)
+void bt_lockpage(BtDb *bt, BtLock mode, BtLatchSet *latch)
{
switch( mode ) {
case BtLockRead:
WriteLock (latch->access);
break;
case BtLockParent:
- WriteLock (latch->parent);
+ WriteOLock (latch->parent, bt->thread_no);
break;
case BtLockAtomic:
- WriteLock (latch->atomic);
+ WriteOLock (latch->atomic, bt->thread_no);
+ break;
+ case BtLockAtomic | BtLockRead:
+ WriteOLock (latch->atomic, bt->thread_no);
+ ReadLock (latch->readwr);
break;
}
}
// remove write, read, or parent lock on requested page
-void bt_unlockpage(BtLock mode, BtLatchSet *latch)
+void bt_unlockpage(BtDb *bt, BtLock mode, BtLatchSet *latch)
{
switch( mode ) {
case BtLockRead:
WriteRelease (latch->access);
break;
case BtLockParent:
- WriteRelease (latch->parent);
+ WriteORelease (latch->parent);
break;
case BtLockAtomic:
- WriteRelease (latch->atomic);
+ WriteORelease (latch->atomic);
+ break;
+ case BtLockAtomic | BtLockRead:
+ WriteORelease (latch->atomic);
+ ReadRelease (latch->readwr);
break;
}
}
// obtain access lock using lock chaining with Access mode
if( page_no > ROOT_page )
- bt_lockpage(BtLockAccess, set->latch);
+ bt_lockpage(bt, BtLockAccess, set->latch);
set->page = bt_mappage (bt, set->latch);
// release & unpin parent or left sibling page
if( prevpage ) {
- bt_unlockpage(prevmode, prevlatch);
+ bt_unlockpage(bt, prevmode, prevlatch);
bt_unpinlatch (prevlatch);
prevpage = 0;
}
// obtain mode lock using lock chaining through AccessLock
- bt_lockpage(mode, set->latch);
+ bt_lockpage(bt, mode, set->latch);
if( set->page->free )
return bt->err = BTERR_struct, 0;
if( page_no > ROOT_page )
- bt_unlockpage(BtLockAccess, set->latch);
+ bt_unlockpage(bt, BtLockAccess, set->latch);
// re-read and re-lock root after determining actual level of root
drill = set->page->lvl;
if( lock != BtLockRead && drill == lvl ) {
- bt_unlockpage(mode, set->latch);
+ bt_unlockpage(bt, mode, set->latch);
bt_unpinlatch (set->latch);
continue;
}
// find key on page at this level
// and descend to requested level
- if( set->page->kill )
- goto slideright;
-
- if( slot = bt_findslot (set->page, key, len) ) {
+ if( !set->page->kill )
+ if( slot = bt_findslot (set->page, key, len) ) {
if( drill == lvl )
return slot;
page_no = bt_getid(valptr(set->page, slot)->value);
drill--;
continue;
- }
+ }
// or slide right into next page
-slideright:
page_no = bt_getid(set->page->right);
-
} while( page_no );
// return error on end of right chain
// unlock released page
- bt_unlockpage (BtLockDelete, set->latch);
- bt_unlockpage (BtLockWrite, set->latch);
+ bt_unlockpage (bt, BtLockDelete, set->latch);
+ bt_unlockpage (bt, BtLockWrite, set->latch);
+ bt_unpinlatch (set->latch);
// unlock allocation page
ptr = keyptr(set->page, set->page->cnt);
memcpy (leftkey, ptr, ptr->len + sizeof(BtKey));
- bt_lockpage (BtLockParent, set->latch);
- bt_unlockpage (BtLockWrite, set->latch);
+ bt_lockpage (bt, BtLockParent, set->latch);
+ bt_unlockpage (bt, BtLockWrite, set->latch);
// insert new (now smaller) fence key
if( bt_deletekey (bt, ptr->key, ptr->len, lvl+1) )
return bt->err;
- bt_unlockpage (BtLockParent, set->latch);
+ bt_unlockpage (bt, BtLockParent, set->latch);
bt_unpinlatch(set->latch);
return 0;
}
else
return bt->err;
- bt_lockpage (BtLockDelete, child->latch);
- bt_lockpage (BtLockWrite, child->latch);
+ bt_lockpage (bt, BtLockDelete, child->latch);
+ bt_lockpage (bt, BtLockWrite, child->latch);
memcpy (root->page, child->page, bt->mgr->page_size);
root->latch->dirty = 1;
bt_freepage (bt, child);
- bt_unpinlatch (child->latch);
} while( root->page->lvl > 1 && root->page->act == 1 );
- bt_unlockpage (BtLockWrite, root->latch);
+ bt_unlockpage (bt, BtLockWrite, root->latch);
bt_unpinlatch (root->latch);
return 0;
}
else
return 0;
- bt_lockpage (BtLockWrite, right->latch);
+ bt_lockpage (bt, BtLockWrite, right->latch);
// cache copy of key to update
right->latch->dirty = 1;
right->page->kill = 1;
- bt_lockpage (BtLockParent, right->latch);
- bt_unlockpage (BtLockWrite, right->latch);
+ bt_lockpage (bt, BtLockParent, right->latch);
+ bt_unlockpage (bt, BtLockWrite, right->latch);
- bt_lockpage (BtLockParent, set->latch);
- bt_unlockpage (BtLockWrite, set->latch);
+ bt_lockpage (bt, BtLockParent, set->latch);
+ bt_unlockpage (bt, BtLockWrite, set->latch);
// redirect higher key directly to our new node contents
// obtain delete and write locks to right node
- bt_unlockpage (BtLockParent, right->latch);
- bt_lockpage (BtLockDelete, right->latch);
- bt_lockpage (BtLockWrite, right->latch);
+ bt_unlockpage (bt, BtLockParent, right->latch);
+ bt_lockpage (bt, BtLockDelete, right->latch);
+ bt_lockpage (bt, BtLockWrite, right->latch);
bt_freepage (bt, right);
- bt_unpinlatch (right->latch);
- bt_unlockpage (BtLockParent, set->latch);
+ bt_unlockpage (bt, BtLockParent, set->latch);
bt_unpinlatch (set->latch);
- bt->found = 1;
return 0;
}
if( bt_fixfence (bt, set, lvl) )
return bt->err;
else
- return bt->found = found, 0;
+ return 0;
// do we need to collapse root?
if( bt_collapseroot (bt, set) )
return bt->err;
else
- return bt->found = found, 0;
+ return 0;
// delete empty page
return bt_deletepage (bt, set);
set->latch->dirty = 1;
- bt_unlockpage(BtLockWrite, set->latch);
+ bt_unlockpage(bt, BtLockWrite, set->latch);
bt_unpinlatch (set->latch);
- return bt->found = found, 0;
+ return 0;
}
BtKey *bt_foundkey (BtDb *bt)
// obtain access lock using lock chaining with Access mode
- bt_lockpage(BtLockAccess, set->latch);
+ bt_lockpage(bt, BtLockAccess, set->latch);
- bt_unlockpage(BtLockRead, prevlatch);
+ bt_unlockpage(bt, BtLockRead, prevlatch);
bt_unpinlatch (prevlatch);
- bt_lockpage(BtLockRead, set->latch);
- bt_unlockpage(BtLockAccess, set->latch);
+ bt_lockpage(bt, BtLockRead, set->latch);
+ bt_unlockpage(bt, BtLockAccess, set->latch);
return 1;
}
} while( slot = bt_findnext (bt, set, slot) );
- bt_unlockpage (BtLockRead, set->latch);
+ bt_unlockpage (bt, BtLockRead, set->latch);
bt_unpinlatch (set->latch);
return ret;
}
while( cnt++ < max ) {
if( cnt == slot )
newslot = idx + 2;
- if( cnt < max && slotptr(bt->frame,cnt)->dead )
+
+ if( cnt < max || bt->frame->lvl )
+ if( slotptr(bt->frame,cnt)->dead )
continue;
// copy the value across
// make a librarian slot
- if( idx ) {
- slotptr(page, ++idx)->off = nxt;
- slotptr(page, idx)->type = Librarian;
- slotptr(page, idx)->dead = 1;
- }
+ slotptr(page, ++idx)->off = nxt;
+ slotptr(page, idx)->type = Librarian;
+ slotptr(page, idx)->dead = 1;
// set up the slot
// release and unpin root pages
- bt_unlockpage(BtLockWrite, root->latch);
+ bt_unlockpage(bt, BtLockWrite, root->latch);
bt_unpinlatch (root->latch);
bt_unpinlatch (right);
idx = 0;
while( cnt++ < max ) {
- if( slotptr(set->page, cnt)->dead && cnt < max )
+ if( cnt < max || set->page->lvl )
+ if( slotptr(set->page, cnt)->dead )
continue;
+
src = valptr(set->page, cnt);
nxt -= src->len + sizeof(BtVal);
memcpy ((unsigned char *)bt->frame + nxt, src, src->len + sizeof(BtVal));
// add librarian slot
- if( idx ) {
- slotptr(bt->frame, ++idx)->off = nxt;
- slotptr(bt->frame, idx)->type = Librarian;
- slotptr(bt->frame, idx)->dead = 1;
- }
+ slotptr(bt->frame, ++idx)->off = nxt;
+ slotptr(bt->frame, idx)->type = Librarian;
+ slotptr(bt->frame, idx)->dead = 1;
// add actual slot
// add librarian slot
- if( idx ) {
- slotptr(set->page, ++idx)->off = nxt;
- slotptr(set->page, idx)->type = Librarian;
- slotptr(set->page, idx)->dead = 1;
- }
+ slotptr(set->page, ++idx)->off = nxt;
+ slotptr(set->page, idx)->type = Librarian;
+ slotptr(set->page, idx)->dead = 1;
// add actual slot
// insert new fences in their parent pages
- bt_lockpage (BtLockParent, right);
+ bt_lockpage (bt, BtLockParent, right);
- bt_lockpage (BtLockParent, set->latch);
- bt_unlockpage (BtLockWrite, set->latch);
+ bt_lockpage (bt, BtLockParent, set->latch);
+ bt_unlockpage (bt, BtLockWrite, set->latch);
// insert new fence for reformulated left block of smaller keys
if( bt_insertkey (bt, ptr->key, ptr->len, lvl+1, value, BtId, 1) )
return bt->err;
- bt_unlockpage (BtLockParent, set->latch);
+ bt_unlockpage (bt, BtLockParent, set->latch);
bt_unpinlatch (set->latch);
- bt_unlockpage (BtLockParent, right);
+ bt_unlockpage (bt, BtLockParent, right);
bt_unpinlatch (right);
return 0;
}
node->dead = 0;
if( release ) {
- bt_unlockpage (BtLockWrite, set->latch);
+ bt_unlockpage (bt, BtLockWrite, set->latch);
bt_unpinlatch (set->latch);
}
slotptr(set->page, slot)->dead = 0;
val->len = vallen;
memcpy (val->value, value, vallen);
- bt_unlockpage(BtLockWrite, set->latch);
+ bt_unlockpage(bt, BtLockWrite, set->latch);
bt_unpinlatch (set->latch);
return 0;
}
ptr->len = keylen;
slotptr(set->page, slot)->off = set->page->min;
- bt_unlockpage(BtLockWrite, set->latch);
+ bt_unlockpage(bt, BtLockWrite, set->latch);
bt_unpinlatch (set->latch);
return 0;
}
uint entry; // latch table entry number
uint slot:31; // page slot number
uint reuse:1; // reused previous page
-} AtomicMod;
+} AtomicTxn;
typedef struct {
uid page_no; // page number for split leaf
unsigned char leafkey[BT_keyarray];
} AtomicKey;
-// find and load leaf page for given key
-// leave page Atomic locked and Read locked.
-
-int bt_atomicload (BtDb *bt, BtPageSet *set, unsigned char *key, uint len)
-{
-BtLatchSet *prevlatch;
-uid page_no;
-uint slot;
-
- // find level zero page
-
- if( !(slot = bt_loadpage (bt, set, key, len, 1, BtLockRead)) )
- return 0;
-
- // find next non-dead entry on this page
- // it will be the fence key if nothing else
-
- while( slotptr(set->page, slot)->dead )
- if( slot++ < set->page->cnt )
- continue;
- else
- return bt->err = BTERR_struct, 0;
-
- page_no = bt_getid(valptr(set->page, slot)->value);
- prevlatch = set->latch;
-
- while( page_no ) {
- if( set->latch = bt_pinlatch (bt, page_no, 1) )
- set->page = bt_mappage (bt, set->latch);
- else
- return 0;
-
- if( set->page->free || set->page->lvl )
- return bt->err = BTERR_struct, 0;
-
- // obtain read lock using lock chaining with Access mode
- // release & unpin parent/left sibling page
-
- bt_lockpage(BtLockAccess, set->latch);
-
- bt_unlockpage(BtLockRead, prevlatch);
- bt_unpinlatch (prevlatch);
-
- bt_lockpage(BtLockRead, set->latch);
- bt_unlockpage(BtLockAccess, set->latch);
-
- // find key on page at this level
- // and descend to requested level
-
- if( !set->page->kill )
- if( !bt_getid (set->page->right) || keycmp (keyptr(set->page, set->page->cnt), key, len) >= 0 ) {
- bt_unlockpage(BtLockRead, set->latch);
- bt_lockpage(BtLockAtomic, set->latch);
- bt_lockpage(BtLockAccess, set->latch);
- bt_lockpage(BtLockRead, set->latch);
- bt_unlockpage(BtLockAccess, set->latch);
-
- if( slot = bt_findslot (set->page, key, len) )
- return slot;
-
- bt_unlockpage(BtLockAtomic, set->latch);
- }
-
- // slide right into next page
-
- page_no = bt_getid(set->page->right);
- prevlatch = set->latch;
- }
-
- // return error on end of right chain
-
- bt->err = BTERR_struct;
- return 0; // return error
-}
-
// determine actual page where key is located
// return slot number
-uint bt_atomicpage (BtDb *bt, BtPage source, AtomicMod *locks, uint src, BtPageSet *set)
+uint bt_atomicpage (BtDb *bt, BtPage source, AtomicTxn *locks, uint src, BtPageSet *set)
{
BtKey *key = keyptr(source,src);
uint slot = locks[src].slot;
return slot;
}
- // is locks->reuse set?
- // if so, find where our key
- // is located on previous page or split pages
+ // is locks->reuse set? or was slot zeroed?
+ // if so, find where our key is located
+ // on current page or pages split on
+ // same page txn operations.
do {
set->latch = bt->mgr->latchsets + entry;
set->page = bt_mappage (bt, set->latch);
if( slot = bt_findslot(set->page, key->key, key->len) ) {
+ if( slotptr(set->page, slot)->type == Librarian )
+ slot++;
if( locks[src].reuse )
locks[src].entry = entry;
return slot;
return 0;
}
-BTERR bt_atomicinsert (BtDb *bt, BtPage source, AtomicMod *locks, uint src)
+BTERR bt_atomicinsert (BtDb *bt, BtPage source, AtomicTxn *locks, uint src)
{
BtKey *key = keyptr(source, src);
BtVal *val = valptr(source, src);
BtLatchSet *latch;
BtPageSet set[1];
-uint entry;
+uint entry, slot;
- while( locks[src].slot = bt_atomicpage (bt, source, locks, src, set) ) {
- if( locks[src].slot = bt_cleanpage(bt, set, key->len, locks[src].slot, val->len) )
- return bt_insertslot (bt, set, locks[src].slot, key->key, key->len, val->value, val->len, slotptr(source,src)->type, 0);
+ while( slot = bt_atomicpage (bt, source, locks, src, set) ) {
+ if( slot = bt_cleanpage(bt, set, key->len, slot, val->len) )
+ return bt_insertslot (bt, set, slot, key->key, key->len, val->value, val->len, slotptr(source,src)->type, 0);
if( entry = bt_splitpage (bt, set) )
latch = bt->mgr->latchsets + entry;
// splice right page into split chain
// and WriteLock it.
+ bt_lockpage(bt, BtLockWrite, latch);
latch->split = set->latch->split;
set->latch->split = entry;
- bt_lockpage(BtLockWrite, latch);
+ locks[src].slot = 0;
}
return bt->err = BTERR_atomic;
}
-BTERR bt_atomicdelete (BtDb *bt, BtPage source, AtomicMod *locks, uint src)
+BTERR bt_atomicdelete (BtDb *bt, BtPage source, AtomicTxn *locks, uint src)
{
BtKey *key = keyptr(source, src);
uint idx, entry, slot;
BtVal *val;
if( slot = bt_atomicpage (bt, source, locks, src, set) )
+ ptr = keyptr(set->page, slot);
+ else
+ return bt->err = BTERR_struct;
+
+ if( !keycmp (ptr, key->key, key->len) )
+ if( !slotptr(set->page, slot)->dead )
slotptr(set->page, slot)->dead = 1;
+ else
+ return 0;
else
- return bt->err = BTERR_struct;
+ return 0;
- ptr = keyptr(set->page, slot);
val = valptr(set->page, slot);
-
set->page->garbage += ptr->len + val->len + sizeof(BtKey) + sizeof(BtVal);
set->latch->dirty = 1;
set->page->act--;
+ bt->found++;
return 0;
}
-uint bt_parentmatch (AtomicKey *head, uint entry)
+// delete an empty master page for a transaction
+
+// note that the far right page never empties because
+// it always contains (at least) the infinite stopper key
+// and that all pages that don't contain any keys are
+// deleted, or are being held under Atomic lock.
+
+BTERR bt_atomicfree (BtDb *bt, BtPageSet *prev)
{
-uint cnt = 0;
+BtPageSet right[1], temp[1];
+unsigned char value[BtId];
+uid right_page_no;
+BtKey *ptr;
+
+ bt_lockpage(bt, BtLockWrite, prev->latch);
+
+ // grab the right sibling
+
+ if( right->latch = bt_pinlatch(bt, bt_getid (prev->page->right), 1) )
+ right->page = bt_mappage (bt, right->latch);
+ else
+ return bt->err;
+
+ bt_lockpage(bt, BtLockAtomic, right->latch);
+ bt_lockpage(bt, BtLockWrite, right->latch);
- if( head )
- do if( head->entry == entry )
- head->nounlock = 1, cnt++;
- while( head = head->next );
+ // and pull contents over empty page
+ // while preserving master's left link
- return cnt;
+ memcpy (right->page->left, prev->page->left, BtId);
+ memcpy (prev->page, right->page, bt->mgr->page_size);
+
+ // forward seekers to old right sibling
+ // to new page location in set
+
+ bt_putid (right->page->right, prev->latch->page_no);
+ right->latch->dirty = 1;
+ right->page->kill = 1;
+
+ // remove pointer to right page for searchers by
+ // changing right fence key to point to the master page
+
+ ptr = keyptr(right->page,right->page->cnt);
+ bt_putid (value, prev->latch->page_no);
+
+ if( bt_insertkey (bt, ptr->key, ptr->len, 1, value, BtId, 1) )
+ return bt->err;
+
+ // now that master page is in good shape we can
+ // remove its locks.
+
+ bt_unlockpage (bt, BtLockAtomic, prev->latch);
+ bt_unlockpage (bt, BtLockWrite, prev->latch);
+
+ // fix master's right sibling's left pointer
+ // to remove scanner's poiner to the right page
+
+ if( right_page_no = bt_getid (prev->page->right) ) {
+ if( temp->latch = bt_pinlatch (bt, right_page_no, 1) )
+ temp->page = bt_mappage (bt, temp->latch);
+
+ bt_lockpage (bt, BtLockWrite, temp->latch);
+ bt_putid (temp->page->left, prev->latch->page_no);
+ temp->latch->dirty = 1;
+
+ bt_unlockpage (bt, BtLockWrite, temp->latch);
+ bt_unpinlatch (temp->latch);
+ } else { // master is now the far right page
+ bt_spinwritelock (bt->mgr->lock);
+ bt_putid (bt->mgr->pagezero->alloc->left, prev->latch->page_no);
+ bt_spinreleasewrite(bt->mgr->lock);
+ }
+
+ // now that there are no pointers to the right page
+ // we can delete it after the last read access occurs
+
+ bt_unlockpage (bt, BtLockWrite, right->latch);
+ bt_unlockpage (bt, BtLockAtomic, right->latch);
+ bt_lockpage (bt, BtLockDelete, right->latch);
+ bt_lockpage (bt, BtLockWrite, right->latch);
+ bt_freepage (bt, right);
+ return 0;
}
// atomic modification of a batch of keys.
unsigned char value[BtId];
BtKey *key, *ptr, *key2;
BtLatchSet *latch;
-AtomicMod *locks;
+AtomicTxn *locks;
int result = 0;
BtSlot temp[1];
BtPage page;
uid right;
int type;
- locks = calloc (source->cnt + 1, sizeof(AtomicMod));
+ locks = calloc (source->cnt + 1, sizeof(AtomicTxn));
head = NULL;
tail = NULL;
if( samepage = src > 1 )
if( samepage = !bt_getid(set->page->right) || keycmp (keyptr(set->page, set->page->cnt), key->key, key->len) >= 0 )
slot = bt_findslot(set->page, key->key, key->len);
- else // release read on previous page
- bt_unlockpage(BtLockRead, set->latch);
+ else
+ bt_unlockpage(bt, BtLockRead, set->latch);
if( !slot )
- if( slot = bt_atomicload(bt, set, key->key, key->len) )
+ if( slot = bt_loadpage(bt, set, key->key, key->len, 0, BtLockRead | BtLockAtomic) )
set->latch->split = 0;
else
return -1;
// return constraint violation if key already exists
- bt_unlockpage(BtLockRead, set->latch);
+ bt_unlockpage(bt, BtLockRead, set->latch);
result = src;
while( src ) {
if( locks[src].entry ) {
set->latch = bt->mgr->latchsets + locks[src].entry;
- bt_unlockpage(BtLockAtomic, set->latch);
+ bt_unlockpage(bt, BtLockAtomic, set->latch);
bt_unpinlatch (set->latch);
}
src--;
// unlock last loadpage lock
- if( source->cnt > 1 )
- bt_unlockpage(BtLockRead, set->latch);
+ if( source->cnt )
+ bt_unlockpage(bt, BtLockRead, set->latch);
// obtain write lock for each master page
if( locks[src].reuse )
continue;
else
- bt_lockpage(BtLockWrite, bt->mgr->latchsets + locks[src].entry);
+ bt_lockpage(bt, BtLockWrite, bt->mgr->latchsets + locks[src].entry);
// insert or delete each key
// process any splits or merges
samepage = src;
// pick-up all splits from master page
+ // each one is already WriteLocked.
entry = prev->latch->split;
set->page = bt_mappage (bt, set->latch);
entry = set->latch->split;
- // delete empty master page
+ // delete empty master page by undoing its split
+ // (this is potentially another empty page)
+ // note that there are no new left pointers yet
if( !prev->page->act ) {
memcpy (set->page->left, prev->page->left, BtId);
memcpy (prev->page, set->page, bt->mgr->page_size);
- bt_lockpage (BtLockDelete, set->latch);
+ bt_lockpage (bt, BtLockDelete, set->latch);
bt_freepage (bt, set);
- bt_unpinlatch (set->latch);
prev->latch->dirty = 1;
continue;
if( !set->page->act ) {
memcpy (prev->page->right, set->page->right, BtId);
prev->latch->split = set->latch->split;
- bt_lockpage (BtLockDelete, set->latch);
+ bt_lockpage (bt, BtLockDelete, set->latch);
bt_freepage (bt, set);
- bt_unpinlatch (set->latch);
continue;
}
tail = leaf;
+ // splice in the left link into the split page
+
bt_putid (set->page->left, prev->latch->page_no);
- bt_lockpage(BtLockParent, prev->latch);
- bt_unlockpage(BtLockWrite, prev->latch);
+ bt_lockpage(bt, BtLockParent, prev->latch);
+ bt_unlockpage(bt, BtLockWrite, prev->latch);
*prev = *set;
}
- // update left pointer in next right page
- // if we did any now non-empty splits
+ // update left pointer in next right page from last split page
+ // (if all splits were reversed, latch->split == 0)
if( latch->split ) {
- // fix left pointer in master's original right sibling
- // or set rightmost page in page zero
+ // fix left pointer in master's original (now split)
+ // far right sibling or set rightmost page in page zero
if( right = bt_getid (prev->page->right) ) {
- if( set->latch = bt_pinlatch (bt, bt_getid(prev->page->right), 1) )
+ if( set->latch = bt_pinlatch (bt, right, 1) )
set->page = bt_mappage (bt, set->latch);
else
return -1;
- bt_lockpage (BtLockWrite, set->latch);
+ bt_lockpage (bt, BtLockWrite, set->latch);
bt_putid (set->page->left, prev->latch->page_no);
set->latch->dirty = 1;
- bt_unlockpage (BtLockWrite, set->latch);
+ bt_unlockpage (bt, BtLockWrite, set->latch);
bt_unpinlatch (set->latch);
- } else
+ } else { // prev is rightmost page
+ bt_spinwritelock (bt->mgr->lock);
bt_putid (bt->mgr->pagezero->alloc->left, prev->latch->page_no);
+ bt_spinreleasewrite(bt->mgr->lock);
+ }
// Process last page split in chain
tail = leaf;
- bt_lockpage(BtLockParent, prev->latch);
- bt_unlockpage(BtLockWrite, prev->latch);
- bt_unlockpage(BtLockAtomic, latch);
+ bt_lockpage(bt, BtLockParent, prev->latch);
+ bt_unlockpage(bt, BtLockWrite, prev->latch);
+
+ // remove atomic lock on master page
+
+ bt_unlockpage(bt, BtLockAtomic, latch);
continue;
}
// finished if prev page occupied (either master or final split)
if( prev->page->act ) {
- bt_unlockpage(BtLockWrite, latch);
- bt_unlockpage(BtLockAtomic, latch);
+ bt_unlockpage(bt, BtLockWrite, latch);
+ bt_unlockpage(bt, BtLockAtomic, latch);
bt_unpinlatch(latch);
continue;
}
- // any splits were reversed, and the
+ // any and all splits were reversed, and the
// master page located in prev is empty, delete it
// by pulling over master's right sibling.
- // Delete empty master's fence
-
- ptr = keyptr(prev->page,prev->page->cnt);
- leaf = calloc (sizeof(AtomicKey), 1);
-
- memcpy (leaf->leafkey, ptr, ptr->len + sizeof(BtKey));
- leaf->page_no = prev->latch->page_no;
- leaf->entry = prev->latch->entry;
- leaf->type = 1;
-
- if( tail )
- tail->next = leaf;
- else
- head = leaf;
- tail = leaf;
-
- bt_lockpage(BtLockParent, prev->latch);
- bt_unlockpage(BtLockWrite, prev->latch);
+ // Remove empty master's fence key
- // grab master's right sibling
+ ptr = keyptr(prev->page,prev->page->cnt);
- if( set->latch = bt_pinlatch(bt, bt_getid (prev->page->right), 1) )
- set->page = bt_mappage (bt, set->latch);
- else
+ if( bt_deletekey (bt, ptr->key, ptr->len, 1) )
return -1;
- bt_lockpage(BtLockWrite, set->latch);
-
- // pull contents over empty page
+ // perform the remainder of the delete
+ // from the FIFO queue
- memcpy (set->page->left, prev->page->left, BtId);
- memcpy (prev->page, set->page, bt->mgr->page_size);
-
- bt_putid (set->page->right, prev->latch->page_no);
- set->latch->dirty = 1;
- set->page->kill = 1;
-
- // add new fence key for new master page contents, delete
- // right sibling after parent posts the new master fence.
-
- ptr = keyptr(set->page,set->page->cnt);
leaf = calloc (sizeof(AtomicKey), 1);
memcpy (leaf->leafkey, ptr, ptr->len + sizeof(BtKey));
leaf->page_no = prev->latch->page_no;
- leaf->entry = set->latch->entry;
+ leaf->entry = prev->latch->entry;
+ leaf->nounlock = 1;
leaf->type = 2;
- // see if right sibling is not yet in the FIFO
-
- if( !bt_parentmatch (head, leaf->entry) )
- bt_lockpage(BtLockParent, set->latch);
-
- bt_unlockpage(BtLockWrite, set->latch);
-
if( tail )
tail->next = leaf;
else
tail = leaf;
- // fix master's far right sibling's left pointer
-
- if( right = bt_getid (set->page->right) ) {
- if( set->latch = bt_pinlatch (bt, right, 1) )
- set->page = bt_mappage (bt, set->latch);
+ // leave atomic lock in place until
+ // deletion completes in next phase.
- bt_lockpage (BtLockWrite, set->latch);
- bt_putid (set->page->left, prev->latch->page_no);
- set->latch->dirty = 1;
-
- bt_unlockpage (BtLockWrite, set->latch);
- bt_unpinlatch (set->latch);
- }
-
- bt_unlockpage(BtLockAtomic, latch);
+ bt_unlockpage(bt, BtLockWrite, prev->latch);
}
// add & delete keys for any pages split or merged during transaction
break;
- case 2: // insert key & free
- if( bt_insertkey (bt, ptr->key, ptr->len, 1, value, BtId, 1) )
+ case 2: // free page
+ if( bt_atomicfree (bt, set) )
return -1;
- bt_lockpage (BtLockDelete, set->latch);
- bt_lockpage (BtLockWrite, set->latch);
- bt_freepage (bt, set);
break;
}
if( !leaf->nounlock )
- bt_unlockpage (BtLockParent, set->latch);
+ bt_unlockpage (bt, BtLockParent, set->latch);
bt_unpinlatch (set->latch);
tail = leaf->next;
{
uid page_no = bt_getid (bt->mgr->pagezero->alloc->left);
BtPageSet set[1];
-uint slot;
if( set->latch = bt_pinlatch (bt, page_no, 1) )
set->page = bt_mappage (bt, set->latch);
else
return 0;
- bt_lockpage(BtLockRead, set->latch);
-
+ bt_lockpage(bt, BtLockRead, set->latch);
memcpy (bt->cursor, set->page, bt->mgr->page_size);
- slot = set->page->cnt;
-
- bt_unlockpage(BtLockRead, set->latch);
+ bt_unlockpage(bt, BtLockRead, set->latch);
bt_unpinlatch (set->latch);
- return slot;
+
+ bt->cursor_page = page_no;
+ return bt->cursor->cnt;
}
// return previous slot on cursor page
else
return 0;
- bt_lockpage(BtLockRead, set->latch);
+ bt_lockpage(bt, BtLockRead, set->latch);
memcpy (bt->cursor, set->page, bt->mgr->page_size);
- bt_unlockpage(BtLockRead, set->latch);
+ bt_unlockpage(bt, BtLockRead, set->latch);
bt_unpinlatch (set->latch);
next = bt_getid (bt->cursor->right);
+ if( bt->cursor->kill )
+ goto findourself;
+
if( next != us )
if( next == ourright )
goto goleft;
else
return 0;
- bt_lockpage(BtLockRead, set->latch);
+ bt_lockpage(bt, BtLockRead, set->latch);
memcpy (bt->cursor, set->page, bt->mgr->page_size);
- bt_unlockpage(BtLockRead, set->latch);
+ bt_unlockpage(bt, BtLockRead, set->latch);
bt_unpinlatch (set->latch);
slot = 0;
bt->cursor_page = set->latch->page_no;
- bt_unlockpage(BtLockRead, set->latch);
+ bt_unlockpage(bt, BtLockRead, set->latch);
bt_unpinlatch (set->latch);
return slot;
}
fprintf(stderr, "latchset %d accesslocked for page %.8x\n", slot, latch->page_no);
memset ((ushort *)latch->access, 0, sizeof(RWLock));
- if( *latch->parent->rin & MASK )
+ if( *latch->parent->ticket != *latch->parent->serving )
fprintf(stderr, "latchset %d parentlocked for page %.8x\n", slot, latch->page_no);
memset ((ushort *)latch->parent, 0, sizeof(RWLock));
fprintf(stderr, "latchset %d accesslocked for page %.8x\n", idx, latch->page_no);
memset ((ushort *)latch->access, 0, sizeof(RWLock));
- if( *latch->parent->rin & MASK )
+ if( *latch->parent->ticket != *latch->parent->serving )
fprintf(stderr, "latchset %d parentlocked for page %.8x\n", idx, latch->page_no);
memset ((ushort *)latch->parent, 0, sizeof(RWLock));
uint __stdcall index_file (void *arg)
#endif
{
-int line = 0, found = 0, cnt = 0, unique;
+int line = 0, found = 0, cnt = 0, idx;
uid next, page_no = LEAF_page; // start on first page of leaves
+int ch, len = 0, slot, type = 0;
unsigned char key[BT_maxkey];
unsigned char txn[65536];
ThreadArg *args = arg;
-int ch, len = 0, slot;
BtPageSet set[1];
uint nxt = 65536;
BtPage page;
bt = bt_open (args->mgr);
page = (BtPage)txn;
- unique = (args->type[1] | 0x20) != 'd';
+ if( args->idx < strlen (args->type) )
+ ch = args->type[args->idx];
+ else
+ ch = args->type[strlen(args->type) - 1];
- switch(args->type[0] | 0x20)
+ switch(ch | 0x20)
{
case 'a':
fprintf(stderr, "started latch mgr audit\n");
fprintf(stderr, "finished latch mgr audit, found %d keys\n", cnt);
break;
- case 't':
- fprintf(stderr, "started TXN pennysort for %s\n", args->infile);
+ case 'd':
+ type = Delete;
+
+ case 'p':
+ if( !type )
+ type = Unique;
+
+ if( args->num )
+ if( type == Delete )
+ fprintf(stderr, "started TXN pennysort delete for %s\n", args->infile);
+ else
+ fprintf(stderr, "started TXN pennysort insert for %s\n", args->infile);
+ else
+ if( type == Delete )
+ fprintf(stderr, "started pennysort delete for %s\n", args->infile);
+ else
+ fprintf(stderr, "started pennysort insert for %s\n", args->infile);
+
if( in = fopen (args->infile, "rb") )
while( ch = getc(in), ch != EOF )
if( ch == '\n' )
{
line++;
+
+ if( !args->num ) {
+ if( bt_insertkey (bt, key, 10, 0, key + 10, len - 10, 1) )
+ fprintf(stderr, "Error %d Line: %d\n", bt->err, line), exit(0);
+ len = 0;
+ continue;
+ }
+
nxt -= len - 10;
memcpy (txn + nxt, key + 10, len - 10);
nxt -= 1;
nxt -= 1;
txn[nxt] = 10;
slotptr(page,++cnt)->off = nxt;
- slotptr(page,cnt)->type = Delete;
+ slotptr(page,cnt)->type = type;
len = 0;
if( cnt < args->num )
if( bt_atomictxn (bt, page) )
fprintf(stderr, "Error %d Line: %d\n", bt->err, line), exit(0);
- nxt = 65536;
+ nxt = sizeof(txn);
cnt = 0;
}
else if( len < BT_maxkey )
key[len++] = ch;
- fprintf(stderr, "finished %s for %d keys: %d reads %d writes\n", args->infile, line, bt->reads, bt->writes);
- break;
-
- case 'p':
- fprintf(stderr, "started pennysort for %s\n", args->infile);
- if( in = fopen (args->infile, "rb") )
- while( ch = getc(in), ch != EOF )
- if( ch == '\n' )
- {
- line++;
-
- if( bt_insertkey (bt, key, 10, 0, key + 10, len - 10, unique) )
- fprintf(stderr, "Error %d Line: %d\n", bt->err, line), exit(0);
- len = 0;
- }
- else if( len < BT_maxkey )
- key[len++] = ch;
- fprintf(stderr, "finished %s for %d keys: %d reads %d writes\n", args->infile, line, bt->reads, bt->writes);
+ fprintf(stderr, "finished %s for %d keys: %d reads %d writes %d found\n", args->infile, line, bt->reads, bt->writes, bt->found);
break;
case 'w':
{
line++;
- if( args->num == 1 )
- sprintf((char *)key+len, "%.9d", 1000000000 - line), len += 9;
-
- else if( args->num )
- sprintf((char *)key+len, "%.9d", line + args->idx * args->num), len += 9;
-
- if( bt_insertkey (bt, key, len, 0, NULL, 0, unique) )
+ if( bt_insertkey (bt, key, len, 0, NULL, 0, 1) )
fprintf(stderr, "Error %d Line: %d\n", bt->err, line), exit(0);
len = 0;
}
fprintf(stderr, "finished %s for %d keys: %d reads %d writes\n", args->infile, line, bt->reads, bt->writes);
break;
- case 'd':
- fprintf(stderr, "started deleting keys for %s\n", args->infile);
- if( in = fopen (args->infile, "rb") )
- while( ch = getc(in), ch != EOF )
- if( ch == '\n' )
- {
- line++;
- if( args->num == 1 )
- sprintf((char *)key+len, "%.9d", 1000000000 - line), len += 9;
-
- else if( args->num )
- sprintf((char *)key+len, "%.9d", line + args->idx * args->num), len += 9;
-
- if( bt_findkey (bt, key, len, NULL, 0) < 0 )
- fprintf(stderr, "Cannot find key for Line: %d\n", line), exit(0);
- ptr = (BtKey*)(bt->key);
- found++;
-
- if( bt_deletekey (bt, ptr->key, ptr->len, 0) )
- fprintf(stderr, "Error %d Line: %d\n", bt->err, line), exit(0);
- len = 0;
- }
- else if( len < BT_maxkey )
- key[len++] = ch;
- fprintf(stderr, "finished %s for %d keys, %d found: %d reads %d writes\n", args->infile, line, found, bt->reads, bt->writes);
- break;
-
case 'f':
fprintf(stderr, "started finding keys for %s\n", args->infile);
if( in = fopen (args->infile, "rb") )
if( ch == '\n' )
{
line++;
- if( args->num == 1 )
- sprintf((char *)key+len, "%.9d", 1000000000 - line), len += 9;
-
- else if( args->num )
- sprintf((char *)key+len, "%.9d", line + args->idx * args->num), len += 9;
-
if( bt_findkey (bt, key, len, NULL, 0) == 0 )
found++;
else if( bt->err )
set->page = bt_mappage (bt, set->latch);
else
fprintf(stderr, "unable to obtain latch"), exit(1);
- bt_lockpage (BtLockRead, set->latch);
+ bt_lockpage (bt, BtLockRead, set->latch);
next = bt_getid (set->page->right);
for( slot = 0; slot++ < set->page->cnt; )
cnt++;
}
- bt_unlockpage (BtLockRead, set->latch);
+ bt_unlockpage (bt, BtLockRead, set->latch);
bt_unpinlatch (set->latch);
} while( page_no = next );
fprintf(stderr, " Total keys read %d: %d reads, %d writes\n", cnt, bt->reads, bt->writes);
break;
+ case 'r':
+ fprintf(stderr, "started reverse scan\n");
+ if( slot = bt_lastkey (bt) )
+ while( slot = bt_prevkey (bt, slot) ) {
+ if( slotptr(bt->cursor, slot)->dead )
+ continue;
+
+ ptr = keyptr(bt->cursor, slot);
+ len = ptr->len;
+
+ if( slotptr(bt->cursor, slot)->type == Duplicate )
+ len -= BtId;
+
+ fwrite (ptr->key, len, 1, stdout);
+ val = valptr(bt->cursor, slot);
+ fwrite (val->value, val->len, 1, stdout);
+ fputc ('\n', stdout);
+ cnt++;
+ }
+
+ fprintf(stderr, " Total keys read %d: %d reads, %d writes\n", cnt, bt->reads, bt->writes);
+ break;
+
case 'c':
#ifdef unix
posix_fadvise( bt->mgr->idx, 0, 0, POSIX_FADV_SEQUENTIAL);
}
cnt--; // remove stopper key
- fprintf(stderr, " Total keys read %d: %d reads, %d writes\n", cnt, bt->reads, bt->writes);
+ fprintf(stderr, " Total keys counted %d: %d reads, %d writes\n", cnt, bt->reads, bt->writes);
break;
}
BtDb *bt;
if( argc < 3 ) {
- fprintf (stderr, "Usage: %s idx_file Read/Write/Scan/Delete/Find [page_bits buffer_pool_size line_numbers src_file1 src_file2 ... ]\n", argv[0]);
- fprintf (stderr, " where page_bits is the page size in bits\n");
+ fprintf (stderr, "Usage: %s idx_file cmds [page_bits buffer_pool_size txn_size src_file1 src_file2 ... ]\n", argv[0]);
+ fprintf (stderr, " where idx_file is the name of the btree file\n");
+ fprintf (stderr, " cmds is a string of (c)ount/(r)ev scan/(w)rite/(s)can/(d)elete/(f)ind/(p)ennysort, with one character command for each input src_file. Commands with no input file need a placeholder.\n");
+ fprintf (stderr, " page_bits is the page size in bits\n");
fprintf (stderr, " buffer_pool_size is the number of pages in buffer pool\n");
- fprintf (stderr, " line_numbers = 1 to append line numbers to keys\n");
+ fprintf (stderr, " txn_size = n to block transactions into n units, or zero for no transactions\n");
fprintf (stderr, " src_file1 thru src_filen are files of keys separated by newline\n");
exit(0);
}