// share is count of read accessors
// grant write lock when share == 0
-typedef struct {
- volatile unsigned char mutex;
- volatile unsigned char exclusive:1;
- volatile unsigned char pending:1;
- volatile ushort share;
+volatile typedef struct {
+ unsigned char mutex[1];
+ unsigned char exclusive:1;
+ unsigned char pending:1;
+ ushort share;
} BtSpinLatch;
// hash table entries
uint min; // next key offset
unsigned char bits:7; // page size in bits
unsigned char free:1; // page is on free list
- unsigned char lvl:5; // level of page
+ unsigned char lvl:6; // level of page
unsigned char kill:1; // page is being killed
unsigned char dirty:1; // page has deleted keys
- unsigned char posted:1; // page fence is posted
unsigned char right[BtId]; // page number to right
} *BtPage;
do {
// obtain latch mutex
#ifdef unix
- if( __sync_lock_test_and_set(&latch->mutex, 1) )
+ if( __sync_lock_test_and_set(latch->mutex, 1) )
continue;
#else
- if( _InterlockedExchange8(&latch->mutex, 1) )
+ if( _InterlockedExchange8(latch->mutex, 1) )
continue;
#endif
// see if exclusive request is granted or pending
latch->share++;
#ifdef unix
- __sync_lock_release (&latch->mutex);
+ __sync_lock_release (latch->mutex);
#else
- _InterlockedExchange8(&latch->mutex, 0);
+ _InterlockedExchange8(latch->mutex, 0);
#endif
if( prev )
do {
#ifdef unix
- if( __sync_lock_test_and_set(&latch->mutex, 1) )
+ if( __sync_lock_test_and_set(latch->mutex, 1) )
continue;
#else
- if( _InterlockedExchange8(&latch->mutex, 1) )
+ if( _InterlockedExchange8(latch->mutex, 1) )
continue;
#endif
if( prev = !(latch->share | latch->exclusive) )
else
latch->pending = 1;
#ifdef unix
- __sync_lock_release (&latch->mutex);
+ __sync_lock_release (latch->mutex);
#else
- _InterlockedExchange8(&latch->mutex, 0);
+ _InterlockedExchange8(latch->mutex, 0);
#endif
if( prev )
return;
uint prev;
#ifdef unix
- if( __sync_lock_test_and_set(&latch->mutex, 1) )
+ if( __sync_lock_test_and_set(latch->mutex, 1) )
return 0;
#else
- if( _InterlockedExchange8(&latch->mutex, 1) )
+ if( _InterlockedExchange8(latch->mutex, 1) )
return 0;
#endif
// take write access if all bits are clear
latch->exclusive = 1;
#ifdef unix
- __sync_lock_release (&latch->mutex);
+ __sync_lock_release (latch->mutex);
#else
- _InterlockedExchange8(&latch->mutex, 0);
+ _InterlockedExchange8(latch->mutex, 0);
#endif
return prev;
}
{
// obtain latch mutex
#ifdef unix
- while( __sync_lock_test_and_set(&latch->mutex, 1) )
+ while( __sync_lock_test_and_set(latch->mutex, 1) )
sched_yield();
#else
- while( _InterlockedExchange8(&latch->mutex, 1) )
+ while( _InterlockedExchange8(latch->mutex, 1) )
SwitchToThread();
#endif
latch->exclusive = 0;
#ifdef unix
- __sync_lock_release (&latch->mutex);
+ __sync_lock_release (latch->mutex);
#else
- _InterlockedExchange8(&latch->mutex, 0);
+ _InterlockedExchange8(latch->mutex, 0);
#endif
}
void bt_spinreleaseread(BtSpinLatch *latch)
{
#ifdef unix
- while( __sync_lock_test_and_set(&latch->mutex, 1) )
+ while( __sync_lock_test_and_set(latch->mutex, 1) )
sched_yield();
#else
- while( _InterlockedExchange8(&latch->mutex, 1) )
+ while( _InterlockedExchange8(latch->mutex, 1) )
SwitchToThread();
#endif
latch->share--;
#ifdef unix
- __sync_lock_release (&latch->mutex);
+ __sync_lock_release (latch->mutex);
#else
- _InterlockedExchange8(&latch->mutex, 0);
+ _InterlockedExchange8(latch->mutex, 0);
#endif
}
ushort slot, avail = 0, victim, idx;
BtLatchSet *set;
+ // try to find existing latch table entry for this page
+
// obtain read lock on hash table entry
bt_spinreadlock(bt->mgr->latchmgr->table[hashidx].latch);
close (mgr->idx);
free (mgr->pool);
free (mgr->hash);
- free (mgr->latch);
+ free ((void *)mgr->latch);
free (mgr);
#else
FlushFileBuffers(mgr->idx);
CloseHandle(mgr->idx);
GlobalFree (mgr->pool);
GlobalFree (mgr->hash);
- GlobalFree (mgr->latch);
+ GlobalFree ((void *)mgr->latch);
GlobalFree (mgr);
#endif
}
void bt_close (BtDb *bt)
{
#ifdef unix
- if ( bt->mem )
+ if( bt->mem )
free (bt->mem);
#else
- if ( bt->mem)
+ if( bt->mem)
VirtualFree (bt->mem, 0, MEM_RELEASE);
#endif
free (bt);
return pool;
}
- // upgrade to write lock
+ // upgrade to write lock
bt_spinreleaseread (&bt->mgr->latch[idx]);
bt_spinwritelock (&bt->mgr->latch[idx]);
reuse = 0;
}
#ifdef unix
- if ( pwrite(bt->mgr->idx, page, bt->mgr->page_size, new_page << bt->mgr->page_bits) < bt->mgr->page_size )
+ if( pwrite(bt->mgr->idx, page, bt->mgr->page_size, new_page << bt->mgr->page_bits) < bt->mgr->page_size )
return bt->err = BTERR_wrt, 0;
// if writing first page of pool block, zero last page in the block
- if ( !reuse && bt->mgr->poolmask > 0 && (new_page & bt->mgr->poolmask) == 0 )
+ if( !reuse && bt->mgr->poolmask > 0 && (new_page & bt->mgr->poolmask) == 0 )
{
// use zero buffer to write zeros
- if ( pwrite(bt->mgr->idx,bt->zero, bt->mgr->page_size, (new_page | bt->mgr->poolmask) << bt->mgr->page_bits) < bt->mgr->page_size )
+ if( pwrite(bt->mgr->idx,bt->zero, bt->mgr->page_size, (new_page | bt->mgr->poolmask) << bt->mgr->page_bits) < bt->mgr->page_size )
return bt->err = BTERR_wrt, 0;
}
#else
// re-read and re-lock root after determining actual level of root
if( set->page->lvl != drill) {
- if ( set->page_no != ROOT_page )
+ if( set->page_no != ROOT_page )
return bt->err = BTERR_struct, 0;
drill = set->page->lvl;
BTERR bt_splitpage (BtDb *bt, BtPageSet *set)
{
uint cnt = 0, idx = 0, max, nxt = bt->mgr->page_size;
-unsigned char fencekey[256];
+unsigned char fencekey[256], rightkey[256];
uint lvl = set->page->lvl;
+BtPageSet right[1];
uint prev;
-uid right;
BtKey key;
// split higher half of keys to bt->frame
slotptr(bt->frame, idx)->off = nxt;
}
+ // remember existing fence key for new page to the right
+
+ memcpy (rightkey, key, key->len + 1);
+
bt->frame->bits = bt->mgr->page_bits;
bt->frame->min = nxt;
bt->frame->cnt = idx;
// get new free page and write higher keys to it.
- if( !(right = bt_newpage(bt, bt->frame)) )
+ if( !(right->page_no = bt_newpage(bt, bt->frame)) )
return bt->err;
// update lower keys to continue in old page
memcpy (bt->frame, set->page, bt->mgr->page_size);
memset (set->page+1, 0, bt->mgr->page_size - sizeof(*set->page));
nxt = bt->mgr->page_size;
- set->page->posted = 0;
set->page->dirty = 0;
set->page->act = 0;
cnt = 0;
// remember fence key for smaller page
memcpy(fencekey, key, key->len + 1);
- bt_putid(set->page->right, right);
+
+ bt_putid(set->page->right, right->page_no);
set->page->min = nxt;
set->page->cnt = idx;
// if current page is the root page, split it
if( set->page_no == ROOT_page )
- return bt_splitroot (bt, set, fencekey, right);
-
- right = 0;
+ return bt_splitroot (bt, set, fencekey, right->page_no);
// insert new fences in their parent pages
- while( 1 ) {
- bt_lockpage (BtLockParent, set->latch);
-
- key = keyptr (set->page, set->page->cnt);
- memcpy (fencekey, key, key->len + 1);
- prev = set->page->posted;
-
- if( right && prev ) {
- bt_unlockpage (BtLockParent, set->latch);
- bt_unlockpage (BtLockWrite, set->latch);
- bt_unpinlatch (set->latch);
- bt_unpinpool (set->pool);
- return 0;
- }
-
- right = bt_getid (set->page->right);
- set->page->posted = 1;
-
- bt_unlockpage (BtLockWrite, set->latch);
+ right->latch = bt_pinlatch (bt, right->page_no);
+ bt_lockpage (BtLockParent, right->latch);
- // insert new fence for reformulated left block of smaller keys
+ bt_lockpage (BtLockParent, set->latch);
+ bt_unlockpage (BtLockWrite, set->latch);
- if( !prev )
- if( bt_insertkey (bt, fencekey+1, *fencekey, lvl+1, set->page_no, time(NULL)) )
- return bt->err;
+ // insert new fence for reformulated left block of smaller keys
- bt_unlockpage (BtLockParent, set->latch);
- bt_unpinlatch (set->latch);
- bt_unpinpool (set->pool);
-
- if( !(set->page_no = right) )
- break;
+ if( bt_insertkey (bt, fencekey+1, *fencekey, lvl+1, set->page_no, time(NULL)) )
+ return bt->err;
- set->latch = bt_pinlatch (bt, right);
+ // switch fence for right block of larger keys to new right page
- if( set->pool = bt_pinpool (bt, right) )
- set->page = bt_page (bt, set->pool, right);
- else
- return bt->err;
+ if( bt_insertkey (bt, rightkey+1, *rightkey, lvl+1, right->page_no, time(NULL)) )
+ return bt->err;
- bt_lockpage (BtLockWrite, set->latch);
- }
+ bt_unlockpage (BtLockParent, set->latch);
+ bt_unpinlatch (set->latch);
+ bt_unpinpool (set->pool);
+ bt_unlockpage (BtLockParent, right->latch);
+ bt_unpinlatch (right->latch);
return 0;
}
ptr = keyptr(set->page, slot);
else
{
- if ( !bt->err )
+ if( !bt->err )
bt->err = BTERR_ovflw;
return bt->err;
}
#ifdef STANDALONE
+#ifndef unix
+double getCpuTime(int type)
+{
+FILETIME crtime[1];
+FILETIME xittime[1];
+FILETIME systime[1];
+FILETIME usrtime[1];
+SYSTEMTIME timeconv[1];
+double ans = 0;
+
+ memset (timeconv, 0, sizeof(SYSTEMTIME));
+
+ switch( type ) {
+ case 0:
+ GetSystemTimeAsFileTime (xittime);
+ FileTimeToSystemTime (xittime, timeconv);
+ ans = (double)timeconv->wDayOfWeek * 3600 * 24;
+ break;
+ case 1:
+ GetProcessTimes (GetCurrentProcess(), crtime, xittime, systime, usrtime);
+ FileTimeToSystemTime (usrtime, timeconv);
+ break;
+ case 2:
+ GetProcessTimes (GetCurrentProcess(), crtime, xittime, systime, usrtime);
+ FileTimeToSystemTime (systime, timeconv);
+ break;
+ }
+
+ ans += (double)timeconv->wHour * 3600;
+ ans += (double)timeconv->wMinute * 60;
+ ans += (double)timeconv->wSecond;
+ ans += (double)timeconv->wMilliseconds / 1000;
+ return ans;
+}
+#else
+#include <time.h>
+#include <sys/resource.h>
+
+double getCpuTime(int type)
+{
+struct rusage used[1];
+struct timeval tv[1];
+
+ switch( type ) {
+ case 0:
+ gettimeofday(tv, NULL);
+ return (double)tv->tv_sec + (double)tv->tv_usec / 1000000;
+
+ case 1:
+ getrusage(RUSAGE_SELF, used);
+ return (double)used->ru_utime.tv_sec + (double)used->ru_utime.tv_usec / 1000000;
+
+ case 2:
+ getrusage(RUSAGE_SELF, used);
+ return (double)used->ru_stime.tv_sec + (double)used->ru_stime.tv_usec / 1000000;
+ }
+
+ return 0;
+}
+#endif
+
void bt_latchaudit (BtDb *bt)
{
ushort idx, hashidx;
{
int idx, cnt, len, slot, err;
int segsize, bits = 16;
+double start, stop;
#ifdef unix
pthread_t *threads;
-timer start, stop;
#else
-time_t start[1], stop[1];
HANDLE *threads;
#endif
-double real_time;
ThreadArg *args;
uint poolsize = 0;
+float elapsed;
int num = 0;
char key[1];
BtMgr *mgr;
exit(0);
}
-#ifdef unix
- gettimeofday(&start, NULL);
-#else
- time(start);
-#endif
+ start = getCpuTime(0);
if( argc > 3 )
bits = atoi(argv[3]);
#ifdef unix
for( idx = 0; idx < cnt; idx++ )
pthread_join (threads[idx], NULL);
- gettimeofday(&stop, NULL);
- real_time = 1000.0 * ( stop.tv_sec - start.tv_sec ) + 0.001 * (stop.tv_usec - start.tv_usec );
#else
WaitForMultipleObjects (cnt, threads, TRUE, INFINITE);
for( idx = 0; idx < cnt; idx++ )
CloseHandle(threads[idx]);
- time (stop);
- real_time = 1000 * (*stop - *start);
#endif
- fprintf(stderr, " Time to complete: %.2f seconds\n", real_time/1000);
+ elapsed = getCpuTime(0) - start;
+ fprintf(stderr, " real %dm%.3fs\n", (int)(elapsed/60), elapsed - (int)(elapsed/60)*60);
+ elapsed = getCpuTime(1);
+ fprintf(stderr, " user %dm%.3fs\n", (int)(elapsed/60), elapsed - (int)(elapsed/60)*60);
+ elapsed = getCpuTime(2);
+ fprintf(stderr, " sys %dm%.3fs\n", (int)(elapsed/60), elapsed - (int)(elapsed/60)*60);
+
bt_mgrclose (mgr);
}