1 #include "lfds601_abstraction_internal.h"
\r
7 /****************************************************************************/
\r
8 #if (defined _WIN64 && defined _MSC_VER)
\r
10 /* TRD : 64 bit Windows (user-mode or kernel) on any CPU with the Microsoft C compiler
\r
12 _WIN64 indicates 64 bit Windows
\r
13 _MSC_VER indicates Microsoft C compiler
\r
16 LFDS601_INLINE unsigned char lfds601_abstraction_dcas( volatile lfds601_atom_t *destination, lfds601_atom_t *exchange, lfds601_atom_t *compare )
\r
21 assert( destination != NULL );
\r
22 assert( exchange != NULL );
\r
23 assert( compare != NULL );
\r
25 cas_result = _InterlockedCompareExchange128( (volatile __int64 *) destination, (__int64) *(exchange+1), (__int64) *exchange, (__int64 *) compare );
\r
27 return( cas_result );
\r
36 /****************************************************************************/
\r
37 #if (!defined _WIN64 && defined _WIN32 && defined _MSC_VER)
\r
39 /* TRD : 32 bit Windows (user-mode or kernel) on any CPU with the Microsoft C compiler
\r
41 (!defined _WIN64 && defined _WIN32) indicates 32 bit Windows
\r
42 _MSC_VER indicates Microsoft C compiler
\r
45 LFDS601_INLINE unsigned char lfds601_abstraction_dcas( volatile lfds601_atom_t *destination, lfds601_atom_t *exchange, lfds601_atom_t *compare )
\r
50 assert( destination != NULL );
\r
51 assert( exchange != NULL );
\r
52 assert( compare != NULL );
\r
54 *(__int64 *) &original_compare = *(__int64 *) compare;
\r
56 *(__int64 *) compare = _InterlockedCompareExchange64( (volatile __int64 *) destination, *(__int64 *) exchange, *(__int64 *) compare );
\r
58 return( (unsigned char) (*(__int64 *) compare == *(__int64 *) &original_compare) );
\r
67 /****************************************************************************/
\r
68 #if (defined __x86_64__ && __GNUC__ && !defined __pic__)
\r
70 /* TRD : any OS on x64 with GCC for statically linked code
\r
72 __x86_64__ indicates x64
\r
73 __GNUC__ indicates GCC
\r
76 LFDS601_INLINE unsigned char lfds601_abstraction_dcas( volatile lfds601_atom_t *destination, lfds601_atom_t *exchange, lfds601_atom_t *compare )
\r
81 assert( destination != NULL );
\r
82 assert( exchange != NULL );
\r
83 assert( compare != NULL );
\r
85 __asm__ __volatile__
\r
87 "lock;" // make cmpxchg16b atomic
\r
88 "cmpxchg16b %0;" // cmpxchg16b sets ZF on success
\r
89 "setz %3;" // if ZF set, set cas_result to 1
\r
92 : "+m" (*(volatile lfds601_atom_t (*)[2]) destination), "+a" (*compare), "+d" (*(compare+1)), "=q" (cas_result)
\r
95 : "b" (*exchange), "c" (*(exchange+1))
\r
101 return( cas_result );
\r
111 /****************************************************************************/
\r
112 #if (defined __i686__ && __GNUC__ && !defined __pic__)
\r
114 /* TRD : any OS on x86 with GCC for statically linked code
\r
116 __i686__ indicates x86
\r
117 __GNUC__ indicates GCC
\r
120 LFDS601_INLINE unsigned char lfds601_abstraction_dcas( volatile lfds601_atom_t *destination, lfds601_atom_t *exchange, lfds601_atom_t *compare )
\r
125 assert( destination != NULL );
\r
126 assert( exchange != NULL );
\r
127 assert( compare != NULL );
\r
129 __asm__ __volatile__
\r
131 "lock;" // make cmpxchg8b atomic
\r
132 "cmpxchg8b %0;" // cmpxchg8b sets ZF on success
\r
133 "setz %3;" // if ZF set, set cas_result to 1
\r
136 : "+m" (*(volatile lfds601_atom_t (*)[2]) destination), "+a" (*compare), "+d" (*(compare+1)), "=q" (cas_result)
\r
139 : "b" (*exchange), "c" (*(exchange+1))
\r
145 return( cas_result );
\r
154 /****************************************************************************/
\r
155 #if (defined __x86_64__ && __GNUC__ && defined __pic__)
\r
157 /* TRD : any OS on x64 with GCC for position independent code (e.g. a shared object)
\r
159 __x86_64__ indicates x64
\r
160 __GNUC__ indicates GCC
\r
163 LFDS601_INLINE unsigned char lfds601_abstraction_dcas( volatile lfds601_atom_t *destination, lfds601_atom_t *exchange, lfds601_atom_t *compare )
\r
168 assert( destination != NULL );
\r
169 assert( exchange != NULL );
\r
170 assert( compare != NULL );
\r
172 /* TRD : with a shared object, we cannot clobber RBX
\r
173 as such, we borrow RSI - we load half of the exchange value into it
\r
174 then swap it with RBX
\r
175 then do the compare-and-swap
\r
176 then swap the original value of RBX back from RSI
\r
179 __asm__ __volatile__
\r
181 "xchg %%rsi, %%rbx;" // swap RBI and RBX
\r
182 "lock;" // make cmpxchg16b atomic
\r
183 "cmpxchg16b %0;" // cmpxchg16b sets ZF on success
\r
184 "setz %3;" // if ZF set, set cas_result to 1
\r
185 "xchg %%rbx, %%rsi;" // re-swap RBI and RBX
\r
188 : "+m" (*(volatile lfds601_atom_t (*)[2]) destination), "+a" (*compare), "+d" (*(compare+1)), "=q" (cas_result)
\r
191 : "S" (*exchange), "c" (*(exchange+1))
\r
197 return( cas_result );
\r
207 /****************************************************************************/
\r
208 #if (defined __i686__ && __GNUC__ && defined __pic__)
\r
210 /* TRD : any OS on x86 with GCC for position independent code (e.g. a shared object)
\r
212 __i686__ indicates x86
\r
213 __GNUC__ indicates GCC
\r
216 LFDS601_INLINE unsigned char lfds601_abstraction_dcas( volatile lfds601_atom_t *destination, lfds601_atom_t *exchange, lfds601_atom_t *compare )
\r
221 assert( destination != NULL );
\r
222 assert( exchange != NULL );
\r
223 assert( compare != NULL );
\r
225 /* TRD : with a shared object, we cannot clobber EBX
\r
226 as such, we borrow ESI - we load half of the exchange value into it
\r
227 then swap it with EBX
\r
228 then do the compare-and-swap
\r
229 then swap the original value of EBX back from ESI
\r
232 __asm__ __volatile__
\r
234 "xchg %%esi, %%ebx;" // swap EBI and EBX
\r
235 "lock;" // make cmpxchg8b atomic
\r
236 "cmpxchg8b %0;" // cmpxchg8b sets ZF on success
\r
237 "setz %3;" // if ZF set, set cas_result to 1
\r
238 "xchg %%ebx, %%esi;" // re-swap EBI and EBX
\r
241 : "+m" (*(volatile lfds601_atom_t (*)[2]) destination), "+a" (*compare), "+d" (*(compare+1)), "=q" (cas_result)
\r
244 : "S" (*exchange), "c" (*(exchange+1))
\r
250 return( cas_result );
\r
259 /****************************************************************************/
\r
260 #if (defined __arm__ && __GNUC__)
\r
262 /* TRD : any OS on any ARM with GCC
\r
264 Remember however we need to set into compare the original value of destination.
\r
266 __arm__ indicates ARM
\r
267 __GNUC__ indicates GCC
\r
270 LFDS601_INLINE unsigned char lfds601_abstraction_dcas( volatile lfds601_atom_t *destination, lfds601_atom_t *exchange, lfds601_atom_t *compare )
\r
273 *local_compare = compare,
\r
276 register lfds601_atom_t
\r
277 local_exchange_a __asm("r2"),
\r
278 local_exchange_b __asm("r3"),
\r
279 local_compare_a __asm("r4"),
\r
280 local_compare_b __asm("r5"),
\r
281 original_destination_a __asm("r6"),
\r
282 original_destination_b __asm("r7");
\r
284 assert( destination != NULL );
\r
285 assert( exchange != NULL );
\r
286 assert( compare != NULL );
\r
288 /* TRD : some notes
\r
290 the double word ldr and str instructions require contigous registers
\r
291 where the first register is an even number
\r
293 honouring this requirement requires us to specifically specify
\r
294 the registers to use (which is why we're using register __asm("rN")
\r
295 in the declerations above
\r
297 the arguments to the function occupy registers r0, r1 and r2
\r
299 we can use up to and including r8, but r9 can have a frame pointer in it
\r
301 so we make a copy of compare (freeing up r2, so we can use it for a double
\r
302 word load) but use destination (r0) and exchange (r1) directly
\r
304 note LDRD and STRD became available in armv6k
\r
306 apologies for the trickery with the mcr register variable - the code runs
\r
307 out of registers on armv6k
\r
310 __asm__ __volatile__
\r
312 " mov %[stored_flag], #1;" // put 1 into stored_flag
\r
313 " mov %[local_exchange_a], #0;" // borrow local_exchange_a for mcr, to save a register
\r
314 " mcr p15, 0, %[local_exchange_a], c7, c10, 5;" // memory barrier (ARM v6 compatible)
\r
315 " ldrd %[local_exchange_a], %[local_exchange_b], [%[exchange]];" // load exchange into local_exchange_a and local_exchange_b (which are r2 and r3, respectively)
\r
316 " ldrd %[local_compare_a], %[local_compare_b], [%[local_compare]];" // load compare into local_compare_a and local_compare_b (which are r4 and r5, respectively)
\r
318 " ldrexd %[original_destination_a], %[original_destination_b], [%[destination]];" // load destination into original_destination_a and original_destination_b (which are r6 and r7, respectively)
\r
319 " teq %[original_destination_a], %[local_compare_a];" // compare the first word of destination with the first word of compare
\r
320 " teqeq %[original_destination_b], %[local_compare_b];" // if they're equal, compare the second word of destination with the second word of compare
\r
321 " bne exit;" // if either word of destination does not match its respective word of compare, exit
\r
322 " strexd %[stored_flag], %[local_exchange_a], %[local_exchange_b], [%[destination]];" // if both words were equal, try to store local_exchange_a and local_exchange_b into *destination (on success, strexed puts 0 into stored_flag)
\r
323 " teq %[stored_flag], #0;" // check if stored_flag is 0
\r
324 " bne atomic_dcas;" // if not 0, retry (someone else touched *destination after we loaded but before we stored)
\r
326 " strd %[original_destination_a], %[original_destination_b], [%[local_compare]];" // whether or not the CAS swapped, we always write the original value of destination into *compare
\r
327 " mov %[local_exchange_a], #0;" // borrow local_exchange_a for mcr, to save a register
\r
328 " mcr p15, 0, %[local_exchange_a], c7, c10, 5;" // memory barrier (ARM v6 compatible)
\r
331 : "+m" (*(volatile lfds601_atom_t (*)[2]) destination), "+m" (*(lfds601_atom_t (*)[2]) local_compare),
\r
332 [stored_flag] "+&r" (stored_flag),
\r
333 [original_destination_a] "+&r" (original_destination_a), [original_destination_b] "+&r" (original_destination_b),
\r
334 [local_compare_a] "+&r" (local_compare_a), [local_compare_b] "+&r" (local_compare_b),
\r
335 [local_exchange_a] "+&r" (local_exchange_a), [local_exchange_b] "+&r" (local_exchange_b)
\r
338 : "m" (*(lfds601_atom_t (*)[2]) exchange),
\r
339 [destination] "r" (destination),
\r
340 [local_compare] "r" (local_compare),
\r
341 [exchange] "r" (exchange)
\r
344 : "cc", "memory" // memory is clobbered because we issue a memory barrier
\r
347 /* TRD : stored_flag is set to 0 on store, 1 on fail
\r
348 we need to return 1 on success, 0 on fail
\r
351 return( (unsigned char) !stored_flag );
\r