#include "lfds600_abstraction_internal.h" /****************************************************************************/ #if (defined _WIN64 && defined _MSC_VER) /* TRD : 64 bit Windows (user-mode or kernel) on any CPU with the Microsoft C compiler _WIN64 indicates 64 bit Windows _MSC_VER indicates Microsoft C compiler */ LFDS600_INLINE unsigned char lfds600_abstraction_dcas( volatile lfds600_atom_t *destination, lfds600_atom_t *exchange, lfds600_atom_t *compare ) { unsigned char cas_result; assert( destination != NULL ); assert( exchange != NULL ); assert( compare != NULL ); cas_result = _InterlockedCompareExchange128( (volatile __int64 *) destination, (__int64) *(exchange+1), (__int64) *exchange, (__int64 *) compare ); return( cas_result ); } #endif /****************************************************************************/ #if (!defined _WIN64 && defined _WIN32 && defined _MSC_VER) /* TRD : 32 bit Windows (user-mode or kernel) on any CPU with the Microsoft C compiler (!defined _WIN64 && defined _WIN32) indicates 32 bit Windows _MSC_VER indicates Microsoft C compiler */ LFDS600_INLINE unsigned char lfds600_abstraction_dcas( volatile lfds600_atom_t *destination, lfds600_atom_t *exchange, lfds600_atom_t *compare ) { __int64 original_compare; assert( destination != NULL ); assert( exchange != NULL ); assert( compare != NULL ); *(__int64 *) &original_compare = *(__int64 *) compare; *(__int64 *) compare = _InterlockedCompareExchange64( (volatile __int64 *) destination, *(__int64 *) exchange, *(__int64 *) compare ); return( (unsigned char) (*(__int64 *) compare == *(__int64 *) &original_compare) ); } #endif /****************************************************************************/ #if (defined __x86_64__ && __GNUC__ && !defined __pic__) /* TRD : any OS on x64 with GCC for statically linked code __x86_64__ indicates x64 __GNUC__ indicates GCC */ LFDS600_INLINE unsigned char lfds600_abstraction_dcas( volatile lfds600_atom_t *destination, lfds600_atom_t *exchange, lfds600_atom_t *compare ) { unsigned char cas_result; assert( destination != NULL ); assert( exchange != NULL ); assert( compare != NULL ); __asm__ __volatile__ ( "lock;" // make cmpxchg16b atomic "cmpxchg16b %0;" // cmpxchg16b sets ZF on success "setz %3;" // if ZF set, set cas_result to 1 // output : "+m" (*(volatile lfds600_atom_t (*)[2]) destination), "+a" (*compare), "+d" (*(compare+1)), "=q" (cas_result) // input : "b" (*exchange), "c" (*(exchange+1)) // clobbered : "cc", "memory" ); return( cas_result ); } #endif /****************************************************************************/ #if (defined __i686__ && __GNUC__ && !defined __pic__) /* TRD : any OS on x86 with GCC for statically linked code __i686__ indicates x86 __GNUC__ indicates GCC */ LFDS600_INLINE unsigned char lfds600_abstraction_dcas( volatile lfds600_atom_t *destination, lfds600_atom_t *exchange, lfds600_atom_t *compare ) { unsigned char cas_result; assert( destination != NULL ); assert( exchange != NULL ); assert( compare != NULL ); __asm__ __volatile__ ( "lock;" // make cmpxchg8b atomic "cmpxchg8b %0;" // cmpxchg8b sets ZF on success "setz %3;" // if ZF set, set cas_result to 1 // output : "+m" (*(volatile lfds600_atom_t (*)[2]) destination), "+a" (*compare), "+d" (*(compare+1)), "=q" (cas_result) // input : "b" (*exchange), "c" (*(exchange+1)) // clobbered : "cc", "memory" ); return( cas_result ); } #endif /****************************************************************************/ #if (defined __x86_64__ && __GNUC__ && defined __pic__) /* TRD : any OS on x64 with GCC for position independent code (e.g. a shared object) __x86_64__ indicates x64 __GNUC__ indicates GCC */ LFDS600_INLINE unsigned char lfds600_abstraction_dcas( volatile lfds600_atom_t *destination, lfds600_atom_t *exchange, lfds600_atom_t *compare ) { unsigned char cas_result; assert( destination != NULL ); assert( exchange != NULL ); assert( compare != NULL ); /* TRD : with a shared object, we cannot clobber RBX as such, we borrow RSI - we load half of the exchange value into it then swap it with RBX then do the compare-and-swap then swap the original value of RBX back from RSI */ __asm__ __volatile__ ( "xchg %%rsi, %%rbx;" // swap RBI and RBX "lock;" // make cmpxchg16b atomic "cmpxchg16b %0;" // cmpxchg16b sets ZF on success "setz %3;" // if ZF set, set cas_result to 1 "xchg %%rbx, %%rsi;" // re-swap RBI and RBX // output : "+m" (*(volatile lfds600_atom_t (*)[2]) destination), "+a" (*compare), "+d" (*(compare+1)), "=q" (cas_result) // input : "S" (*exchange), "c" (*(exchange+1)) // clobbered : "cc", "memory" ); return( cas_result ); } #endif /****************************************************************************/ #if (defined __i686__ && __GNUC__ && defined __pic__) /* TRD : any OS on x86 with GCC for position independent code (e.g. a shared object) __i686__ indicates x86 __GNUC__ indicates GCC */ LFDS600_INLINE unsigned char lfds600_abstraction_dcas( volatile lfds600_atom_t *destination, lfds600_atom_t *exchange, lfds600_atom_t *compare ) { unsigned char cas_result; assert( destination != NULL ); assert( exchange != NULL ); assert( compare != NULL ); /* TRD : with a shared object, we cannot clobber EBX as such, we borrow ESI - we load half of the exchange value into it then swap it with EBX then do the compare-and-swap then swap the original value of EBX back from ESI */ __asm__ __volatile__ ( "xchg %%esi, %%ebx;" // swap EBI and EBX "lock;" // make cmpxchg8b atomic "cmpxchg8b %0;" // cmpxchg8b sets ZF on success "setz %3;" // if ZF set, set cas_result to 1 "xchg %%ebx, %%esi;" // re-swap EBI and EBX // output : "+m" (*(volatile lfds600_atom_t (*)[2]) destination), "+a" (*compare), "+d" (*(compare+1)), "=q" (cas_result) // input : "S" (*exchange), "c" (*(exchange+1)) // clobbered : "cc", "memory" ); return( cas_result ); } #endif /****************************************************************************/ #if (defined __arm__ && __GNUC__) /* TRD : any OS on any ARM with GCC Remember however we need to set into compare the original value of destination. __arm__ indicates ARM __GNUC__ indicates GCC */ LFDS600_INLINE unsigned char lfds600_abstraction_dcas( volatile lfds600_atom_t *destination, lfds600_atom_t *exchange, lfds600_atom_t *compare ) { lfds600_atom_t *local_compare = compare, stored_flag = 1; register lfds600_atom_t local_exchange_a __asm("r2"), local_exchange_b __asm("r3"), local_compare_a __asm("r4"), local_compare_b __asm("r5"), original_destination_a __asm("r6"), original_destination_b __asm("r7"); assert( destination != NULL ); assert( exchange != NULL ); assert( compare != NULL ); /* TRD : some notes the double word ldr and str instructions require contigous registers where the first register is an even number honouring this requirement requires us to specifically specify the registers to use (which is why we're using register __asm("rN") in the declerations above the arguments to the function occupy registers r0, r1 and r2 we can use up to and including r8, but r9 can have a frame pointer in it so we make a copy of compare (freeing up r2, so we can use it for a double word load) but use destination (r0) and exchange (r1) directly note LDRD and STRD became available in armv6k apologies for the trickery with the mcr register variable - the code runs out of registers on armv6k */ __asm__ __volatile__ ( " mov %[stored_flag], #1;" // put 1 into stored_flag " mov %[local_exchange_a], #0;" // borrow local_exchange_a for mcr, to save a register " mcr p15, 0, %[local_exchange_a], c7, c10, 5;" // memory barrier (ARM v6 compatible) " ldrd %[local_exchange_a], %[local_exchange_b], [%[exchange]];" // load exchange into local_exchange_a and local_exchange_b (which are r2 and r3, respectively) " ldrd %[local_compare_a], %[local_compare_b], [%[local_compare]];" // load compare into local_compare_a and local_compare_b (which are r4 and r5, respectively) "atomic_dcas:;" " ldrexd %[original_destination_a], %[original_destination_b], [%[destination]];" // load destination into original_destination_a and original_destination_b (which are r6 and r7, respectively) " teq %[original_destination_a], %[local_compare_a];" // compare the first word of destination with the first word of compare " teqeq %[original_destination_b], %[local_compare_b];" // if they're equal, compare the second word of destination with the second word of compare " bne exit;" // if either word of destination does not match its respective word of compare, exit " strexd %[stored_flag], %[local_exchange_a], %[local_exchange_b], [%[destination]];" // if both words were equal, try to store local_exchange_a and local_exchange_b into *destination (on success, strexed puts 0 into stored_flag) " teq %[stored_flag], #0;" // check if stored_flag is 0 " bne atomic_dcas;" // if not 0, retry (someone else touched *destination after we loaded but before we stored) "exit:;" " strd %[original_destination_a], %[original_destination_b], [%[local_compare]];" // whether or not the CAS swapped, we always write the original value of destination into *compare " mov %[local_exchange_a], #0;" // borrow local_exchange_a for mcr, to save a register " mcr p15, 0, %[local_exchange_a], c7, c10, 5;" // memory barrier (ARM v6 compatible) // output : "+m" (*(volatile lfds600_atom_t (*)[2]) destination), "+m" (*(lfds600_atom_t (*)[2]) local_compare), [stored_flag] "+&r" (stored_flag), [original_destination_a] "+&r" (original_destination_a), [original_destination_b] "+&r" (original_destination_b), [local_compare_a] "+&r" (local_compare_a), [local_compare_b] "+&r" (local_compare_b), [local_exchange_a] "+&r" (local_exchange_a), [local_exchange_b] "+&r" (local_exchange_b) // input : "m" (*(lfds600_atom_t (*)[2]) exchange), [destination] "r" (destination), [local_compare] "r" (local_compare), [exchange] "r" (exchange) // clobbered : "cc", "memory" // memory is clobbered because we issue a memory barrier ); /* TRD : stored_flag is set to 0 on store, 1 on fail we need to return 1 on success, 0 on fail */ return( (unsigned char) !stored_flag ); } #endif