kmp_atomic.c

Go to the documentation of this file.
00001 /*
00002  * kmp_atomic.c -- ATOMIC implementation routines
00003  * $Revision: 42263 $
00004  * $Date: 2013-04-04 11:03:19 -0500 (Thu, 04 Apr 2013) $
00005  */
00006 
00007 /* <copyright>
00008     Copyright (c) 1997-2013 Intel Corporation.  All Rights Reserved.
00009 
00010     Redistribution and use in source and binary forms, with or without
00011     modification, are permitted provided that the following conditions
00012     are met:
00013 
00014       * Redistributions of source code must retain the above copyright
00015         notice, this list of conditions and the following disclaimer.
00016       * Redistributions in binary form must reproduce the above copyright
00017         notice, this list of conditions and the following disclaimer in the
00018         documentation and/or other materials provided with the distribution.
00019       * Neither the name of Intel Corporation nor the names of its
00020         contributors may be used to endorse or promote products derived
00021         from this software without specific prior written permission.
00022 
00023     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00026     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00027     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00028     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00029     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00030     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00031     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00032     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00033     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00034 
00035 
00036 ------------------------------------------------------------------------
00037 
00038     Portions of this software are protected under the following patents:
00039         U.S. Patent 5,812,852
00040         U.S. Patent 6,792,599
00041         U.S. Patent 7,069,556
00042         U.S. Patent 7,328,433
00043         U.S. Patent 7,500,242
00044 
00045 </copyright> */
00046 
00047 #include "kmp_atomic.h"
00048 #include "kmp.h"                  // TRUE, asm routines prototypes
00049 
00050 typedef unsigned char uchar;
00051 typedef unsigned short ushort;
00052 
00570 /*
00571  * Global vars
00572  */
00573 
00574 #ifndef KMP_GOMP_COMPAT
00575 int __kmp_atomic_mode = 1;      // Intel perf
00576 #else
00577 int __kmp_atomic_mode = 2;      // GOMP compatibility
00578 #endif /* KMP_GOMP_COMPAT */
00579 
00580 KMP_ALIGN(128)
00581 
00582 kmp_atomic_lock_t __kmp_atomic_lock;     /* Control access to all user coded atomics in Gnu compat mode   */
00583 kmp_atomic_lock_t __kmp_atomic_lock_1i;  /* Control access to all user coded atomics for 1-byte fixed data types */
00584 kmp_atomic_lock_t __kmp_atomic_lock_2i;  /* Control access to all user coded atomics for 2-byte fixed data types */
00585 kmp_atomic_lock_t __kmp_atomic_lock_4i;  /* Control access to all user coded atomics for 4-byte fixed data types */
00586 kmp_atomic_lock_t __kmp_atomic_lock_4r;  /* Control access to all user coded atomics for kmp_real32 data type    */
00587 kmp_atomic_lock_t __kmp_atomic_lock_8i;  /* Control access to all user coded atomics for 8-byte fixed data types */
00588 kmp_atomic_lock_t __kmp_atomic_lock_8r;  /* Control access to all user coded atomics for kmp_real64 data type    */
00589 kmp_atomic_lock_t __kmp_atomic_lock_8c;  /* Control access to all user coded atomics for complex byte data type  */
00590 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type   */
00591 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type         */
00592 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
00593 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
00594 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
00595 
00596 
00597 /*
00598   2007-03-02:
00599   Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
00600   bug on *_32 and *_32e. This is just a temporary workaround for the problem.
00601   It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
00602   routines in assembler language.
00603 */
00604 #define KMP_ATOMIC_VOLATILE volatile
00605 
00606 #if ( KMP_ARCH_X86 )
00607 
00608     static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
00609     static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
00610     static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
00611     static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
00612     static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
00613     static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
00614 
00615     static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
00616     static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
00617     static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
00618     static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
00619     static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
00620     static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
00621 
00622     static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
00623     static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
00624     static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
00625     static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
00626 
00627     static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
00628     static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
00629     static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
00630     static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
00631 
00632 #endif
00633 
00634 /* ------------------------------------------------------------------------ */
00635 /* ATOMIC implementation routines                                           */
00636 /* one routine for each operation and operand type                          */
00637 /* ------------------------------------------------------------------------ */
00638 
00639 // All routines declarations looks like
00640 // void __kmpc_atomic_RTYPE_OP( ident_t*, int*, TYPE *lhs, TYPE rhs );
00641 // ------------------------------------------------------------------------
00642 
00643 #define KMP_CHECK_GTID                                                    \
00644     if ( gtid == KMP_GTID_UNKNOWN ) {                                     \
00645         gtid = __kmp_entry_gtid();                                        \
00646     } // check and get gtid when needed
00647 
00648 // Beginning of a definition (provides name, parameters, gebug trace)
00649 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
00650 //     OP_ID   - operation identifier (add, sub, mul, ...)
00651 //     TYPE    - operands' type
00652 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
00653 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
00654 {                                                                                         \
00655     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
00656     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
00657 
00658 // ------------------------------------------------------------------------
00659 // Lock variables used for critical sections for various size operands
00660 #define ATOMIC_LOCK0   __kmp_atomic_lock       // all types, for Gnu compat
00661 #define ATOMIC_LOCK1i  __kmp_atomic_lock_1i    // char
00662 #define ATOMIC_LOCK2i  __kmp_atomic_lock_2i    // short
00663 #define ATOMIC_LOCK4i  __kmp_atomic_lock_4i    // long int
00664 #define ATOMIC_LOCK4r  __kmp_atomic_lock_4r    // float
00665 #define ATOMIC_LOCK8i  __kmp_atomic_lock_8i    // long long int
00666 #define ATOMIC_LOCK8r  __kmp_atomic_lock_8r    // double
00667 #define ATOMIC_LOCK8c  __kmp_atomic_lock_8c    // float complex
00668 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r   // long double
00669 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r   // _Quad
00670 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c   // double complex
00671 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c   // long double complex
00672 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c   // _Quad complex
00673 
00674 // ------------------------------------------------------------------------
00675 // Operation on *lhs, rhs bound by critical section
00676 //     OP     - operator (it's supposed to contain an assignment)
00677 //     LCK_ID - lock identifier
00678 // Note: don't check gtid as it should always be valid
00679 // 1, 2-byte - expect valid parameter, other - check before this macro
00680 #define OP_CRITICAL(OP,LCK_ID) \
00681     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
00682                                                                           \
00683     (*lhs) OP (rhs);                                                      \
00684                                                                           \
00685     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
00686 
00687 // ------------------------------------------------------------------------
00688 // For GNU compatibility, we may need to use a critical section,
00689 // even though it is not required by the ISA.
00690 //
00691 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
00692 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 
00693 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
00694 // and add or compare and exchange.  Therefore, the FLAG parameter to this
00695 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
00696 // require a critical section, where we predict that they will be implemented
00697 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
00698 //
00699 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
00700 // the FLAG parameter should always be 1.  If we know that we will be using
00701 // a critical section, then we want to make certain that we use the generic
00702 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
00703 // locks that are specialized based upon the size or type of the data.
00704 //
00705 // If FLAG is 0, then we are relying on dead code elimination by the build
00706 // compiler to get rid of the useless block of code, and save a needless
00707 // branch at runtime.
00708 //
00709 
00710 #ifdef KMP_GOMP_COMPAT
00711 #define OP_GOMP_CRITICAL(OP,FLAG)                                         \
00712     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
00713         KMP_CHECK_GTID;                                                   \
00714         OP_CRITICAL( OP, 0 );                                             \
00715         return;                                                           \
00716     }
00717 #else
00718 #define OP_GOMP_CRITICAL(OP,FLAG)
00719 #endif /* KMP_GOMP_COMPAT */
00720 
00721 #if KMP_MIC
00722 
00723  #define KMP_DO_PAUSE _mm_delay_32( 30 )
00724 
00725  inline kmp_int32 __kmp_ex_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ) {
00726     return __sync_bool_compare_and_swap( p, cv, sv );
00727  }
00728  inline kmp_int32 __kmp_ex_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ) {
00729     return __sync_bool_compare_and_swap( p, cv, sv );
00730  }
00731 
00732  #define KMP_EX_COMPARE_AND_STORE_ACQ32 __kmp_ex_compare_and_store32
00733  #define KMP_EX_COMPARE_AND_STORE_ACQ64 __kmp_ex_compare_and_store64
00734 
00735 #else // KMP_MIC
00736 
00737  #define KMP_DO_PAUSE KMP_CPU_PAUSE()
00738 
00739  #define KMP_EX_COMPARE_AND_STORE_ACQ32 KMP_COMPARE_AND_STORE_ACQ32
00740  #define KMP_EX_COMPARE_AND_STORE_ACQ64 KMP_COMPARE_AND_STORE_ACQ64
00741 
00742 #endif // KMP_MIC
00743 
00744 #define KMP_EX_COMPARE_AND_STORE_ACQ8 KMP_COMPARE_AND_STORE_ACQ8
00745 #define KMP_EX_COMPARE_AND_STORE_ACQ16 KMP_COMPARE_AND_STORE_ACQ16
00746 
00747 // ------------------------------------------------------------------------
00748 // Operation on *lhs, rhs using "compare_and_store" routine
00749 //     TYPE    - operands' type
00750 //     BITS    - size in bits, used to distinguish low level calls
00751 //     OP      - operator
00752 // Note: temp_val introduced in order to force the compiler to read
00753 //       *lhs only once (w/o it the compiler reads *lhs twice)
00754 #define OP_CMPXCHG(TYPE,BITS,OP)                                          \
00755     {                                                                     \
00756         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
00757         TYPE old_value, new_value;                                        \
00758         temp_val = *lhs;                                                  \
00759         old_value = temp_val;                                             \
00760         new_value = old_value OP rhs;                                     \
00761         while ( ! KMP_EX_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
00762                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
00763                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
00764         {                                                                 \
00765             KMP_DO_PAUSE;                                                 \
00766                                                                           \
00767             temp_val = *lhs;                                              \
00768             old_value = temp_val;                                         \
00769             new_value = old_value OP rhs;                                 \
00770         }                                                                 \
00771     }
00772 
00773 // 2007-06-25:
00774 // workaround for C78287 (complex(kind=4) data type)
00775 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
00776 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
00777 // This is a problem of the compiler.
00778 // Related tracker is C76005, targeted to 11.0.
00779 // I verified the asm of the workaround.
00780 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                               \
00781     {                                                                     \
00782     char anonym[ ( sizeof( TYPE ) == sizeof( kmp_int##BITS ) ) ? ( 1 ) : ( 0 ) ] = { 1 }; \
00783     struct _sss {                                                     \
00784         TYPE            cmp;                                          \
00785         kmp_int##BITS   *vvv;                                         \
00786     };                                                                \
00787         struct _sss old_value, new_value;                                 \
00788         old_value.vvv = ( kmp_int##BITS * )&old_value.cmp;                \
00789         new_value.vvv = ( kmp_int##BITS * )&new_value.cmp;                \
00790         *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs;              \
00791         new_value.cmp = old_value.cmp OP rhs;                             \
00792         while ( ! KMP_EX_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
00793                       *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,      \
00794                       *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) )   \
00795         {                                                                 \
00796             KMP_DO_PAUSE;                                                 \
00797                                                                           \
00798         *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs;          \
00799         new_value.cmp = old_value.cmp OP rhs;                         \
00800         }                                                                 \
00801     }
00802 // end of the first part of the workaround for C78287
00803 
00804 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
00805 
00806 // ------------------------------------------------------------------------
00807 // X86 or X86_64: no alignment problems ====================================
00808 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
00809 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
00810     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
00811     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \
00812     KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                                \
00813 }
00814 // -------------------------------------------------------------------------
00815 #define ATOMIC_FLOAT_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
00816 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
00817     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
00818     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \
00819     __kmp_test_then_add_real##BITS( lhs, OP rhs );                         \
00820 }
00821 // -------------------------------------------------------------------------
00822 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
00823 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
00824     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
00825     OP_CMPXCHG(TYPE,BITS,OP)                                               \
00826 }
00827 // -------------------------------------------------------------------------
00828 // workaround for C78287 (complex(kind=4) data type)
00829 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
00830 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                                 \
00831     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                 \
00832     OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                               \
00833 }
00834 // end of the second part of the workaround for C78287
00835 
00836 #else
00837 // -------------------------------------------------------------------------
00838 // Code for other architectures that don't handle unaligned accesses.
00839 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
00840 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
00841     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
00842     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \
00843         /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */  \
00844         KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                            \
00845     } else {                                                               \
00846         KMP_CHECK_GTID;                                                    \
00847         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \
00848     }                                                                      \
00849 }
00850 // -------------------------------------------------------------------------
00851 #define ATOMIC_FLOAT_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
00852 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
00853     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
00854     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \
00855         OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                 \
00856     } else {                                                               \
00857         KMP_CHECK_GTID;                                                    \
00858         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \
00859     }                                                                      \
00860 }
00861 // -------------------------------------------------------------------------
00862 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
00863 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
00864     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
00865     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \
00866         OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                 \
00867     } else {                                                               \
00868         KMP_CHECK_GTID;                                                    \
00869         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \
00870     }                                                                      \
00871 }
00872 // -------------------------------------------------------------------------
00873 // workaround for C78287 (complex(kind=4) data type)
00874 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
00875 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                                 \
00876     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                 \
00877     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                      \
00878         OP_CMPXCHG(TYPE,BITS,OP)             /* aligned address */                    \
00879     } else {                                                                          \
00880         KMP_CHECK_GTID;                                                               \
00881         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */             \
00882     }                                                                                 \
00883 }
00884 // end of the second part of the workaround for C78287
00885 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
00886 
00887 // Routines for ATOMIC 4-byte operands addition and subtraction
00888 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32,  32, +, 4i, 3, 0            )  // __kmpc_atomic_fixed4_add
00889 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32,  32, -, 4i, 3, 0            )  // __kmpc_atomic_fixed4_sub
00890 
00891 #if KMP_MIC
00892 ATOMIC_CMPXCHG( float4,  add, kmp_real32, 32, +,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add
00893 ATOMIC_CMPXCHG( float4,  sub, kmp_real32, 32, -,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub
00894 #else
00895 ATOMIC_FLOAT_ADD( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add
00896 ATOMIC_FLOAT_ADD( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub
00897 #endif // KMP_MIC
00898 
00899 // Routines for ATOMIC 8-byte operands addition and subtraction
00900 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64,  64, +, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_add
00901 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64,  64, -, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub
00902 
00903 #if KMP_MIC
00904 ATOMIC_CMPXCHG( float8,  add, kmp_real64, 64, +,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add
00905 ATOMIC_CMPXCHG( float8,  sub, kmp_real64, 64, -,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub
00906 #else
00907 ATOMIC_FLOAT_ADD( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add
00908 ATOMIC_FLOAT_ADD( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub
00909 #endif // KMP_MIC
00910 
00911 // ------------------------------------------------------------------------
00912 // Entries definition for integer operands
00913 //     TYPE_ID - operands type and size (fixed4, float4)
00914 //     OP_ID   - operation identifier (add, sub, mul, ...)
00915 //     TYPE    - operand type
00916 //     BITS    - size in bits, used to distinguish low level calls
00917 //     OP      - operator (used in critical section)
00918 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
00919 //     MASK    - used for alignment check
00920 
00921 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
00922 // ------------------------------------------------------------------------
00923 // Routines for ATOMIC integer operands, other operators
00924 // ------------------------------------------------------------------------
00925 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
00926 ATOMIC_CMPXCHG( fixed1,  add, kmp_int8,    8, +,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_add
00927 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8,    8, &,  1i, 0, 0            )  // __kmpc_atomic_fixed1_andb
00928 ATOMIC_CMPXCHG( fixed1,  div, kmp_int8,    8, /,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div
00929 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8,   8, /,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div
00930 ATOMIC_CMPXCHG( fixed1,  mul, kmp_int8,    8, *,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_mul
00931 ATOMIC_CMPXCHG( fixed1,  orb, kmp_int8,    8, |,  1i, 0, 0            )  // __kmpc_atomic_fixed1_orb
00932 ATOMIC_CMPXCHG( fixed1,  shl, kmp_int8,    8, <<, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl
00933 ATOMIC_CMPXCHG( fixed1,  shr, kmp_int8,    8, >>, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr
00934 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8,   8, >>, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr
00935 ATOMIC_CMPXCHG( fixed1,  sub, kmp_int8,    8, -,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub
00936 ATOMIC_CMPXCHG( fixed1,  xor, kmp_int8,    8, ^,  1i, 0, 0            )  // __kmpc_atomic_fixed1_xor
00937 ATOMIC_CMPXCHG( fixed2,  add, kmp_int16,  16, +,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_add
00938 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16,  16, &,  2i, 1, 0            )  // __kmpc_atomic_fixed2_andb
00939 ATOMIC_CMPXCHG( fixed2,  div, kmp_int16,  16, /,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div
00940 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div
00941 ATOMIC_CMPXCHG( fixed2,  mul, kmp_int16,  16, *,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_mul
00942 ATOMIC_CMPXCHG( fixed2,  orb, kmp_int16,  16, |,  2i, 1, 0            )  // __kmpc_atomic_fixed2_orb
00943 ATOMIC_CMPXCHG( fixed2,  shl, kmp_int16,  16, <<, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl
00944 ATOMIC_CMPXCHG( fixed2,  shr, kmp_int16,  16, >>, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr
00945 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr
00946 ATOMIC_CMPXCHG( fixed2,  sub, kmp_int16,  16, -,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub
00947 ATOMIC_CMPXCHG( fixed2,  xor, kmp_int16,  16, ^,  2i, 1, 0            )  // __kmpc_atomic_fixed2_xor
00948 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32,  32, &,  4i, 3, 0            )  // __kmpc_atomic_fixed4_andb
00949 ATOMIC_CMPXCHG( fixed4,  div, kmp_int32,  32, /,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div
00950 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div
00951 ATOMIC_CMPXCHG( fixed4,  mul, kmp_int32,  32, *,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_mul
00952 ATOMIC_CMPXCHG( fixed4,  orb, kmp_int32,  32, |,  4i, 3, 0            )  // __kmpc_atomic_fixed4_orb
00953 ATOMIC_CMPXCHG( fixed4,  shl, kmp_int32,  32, <<, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl
00954 ATOMIC_CMPXCHG( fixed4,  shr, kmp_int32,  32, >>, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr
00955 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr
00956 ATOMIC_CMPXCHG( fixed4,  xor, kmp_int32,  32, ^,  4i, 3, 0            )  // __kmpc_atomic_fixed4_xor
00957 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64,  64, &,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andb
00958 ATOMIC_CMPXCHG( fixed8,  div, kmp_int64,  64, /,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div
00959 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div
00960 ATOMIC_CMPXCHG( fixed8,  mul, kmp_int64,  64, *,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_mul
00961 ATOMIC_CMPXCHG( fixed8,  orb, kmp_int64,  64, |,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orb
00962 ATOMIC_CMPXCHG( fixed8,  shl, kmp_int64,  64, <<, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl
00963 ATOMIC_CMPXCHG( fixed8,  shr, kmp_int64,  64, >>, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr
00964 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr
00965 ATOMIC_CMPXCHG( fixed8,  xor, kmp_int64,  64, ^,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_xor
00966 ATOMIC_CMPXCHG( float4,  div, kmp_real32, 32, /,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_div
00967 ATOMIC_CMPXCHG( float4,  mul, kmp_real32, 32, *,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_mul
00968 ATOMIC_CMPXCHG( float8,  div, kmp_real64, 64, /,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_div
00969 ATOMIC_CMPXCHG( float8,  mul, kmp_real64, 64, *,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_mul
00970 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
00971 
00972 
00973 /* ------------------------------------------------------------------------ */
00974 /* Routines for C/C++ Reduction operators && and ||                         */
00975 /* ------------------------------------------------------------------------ */
00976 
00977 // ------------------------------------------------------------------------
00978 // Need separate macros for &&, || because there is no combined assignment
00979 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
00980 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)             \
00981 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
00982     OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG )                              \
00983     OP_CRITICAL( = *lhs OP, LCK_ID )                                      \
00984 }
00985 
00986 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
00987 
00988 // ------------------------------------------------------------------------
00989 // X86 or X86_64: no alignment problems ===================================
00990 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
00991 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
00992     OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG )                              \
00993     OP_CMPXCHG(TYPE,BITS,OP)                                              \
00994 }
00995 
00996 #else
00997 // ------------------------------------------------------------------------
00998 // Code for other architectures that don't handle unaligned accesses.
00999 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
01000 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01001     OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG)                                 \
01002     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                          \
01003         OP_CMPXCHG(TYPE,BITS,OP)       /* aligned address */              \
01004     } else {                                                              \
01005         KMP_CHECK_GTID;                                                   \
01006         OP_CRITICAL(= *lhs OP,LCK_ID)  /* unaligned - use critical */     \
01007     }                                                                     \
01008 }
01009 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
01010 
01011 ATOMIC_CMPX_L( fixed1, andl, char,       8, &&, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_andl
01012 ATOMIC_CMPX_L( fixed1,  orl, char,       8, ||, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_orl
01013 ATOMIC_CMPX_L( fixed2, andl, short,     16, &&, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_andl
01014 ATOMIC_CMPX_L( fixed2,  orl, short,     16, ||, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_orl
01015 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 )             // __kmpc_atomic_fixed4_andl
01016 ATOMIC_CMPX_L( fixed4,  orl, kmp_int32, 32, ||, 4i, 3, 0 )             // __kmpc_atomic_fixed4_orl
01017 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andl
01018 ATOMIC_CMPX_L( fixed8,  orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orl
01019 
01020 
01021 /* ------------------------------------------------------------------------- */
01022 /* Routines for Fortran operators that matched no one in C:                  */
01023 /* MAX, MIN, .EQV., .NEQV.                                                   */
01024 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
01025 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
01026 /* ------------------------------------------------------------------------- */
01027 
01028 // -------------------------------------------------------------------------
01029 // MIN and MAX need separate macros
01030 // OP - operator to check if we need any actions?
01031 #define MIN_MAX_CRITSECT(OP,LCK_ID)                                        \
01032     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
01033                                                                            \
01034     if ( *lhs OP rhs ) {                 /* still need actions? */         \
01035         *lhs = rhs;                                                        \
01036     }                                                                      \
01037     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
01038 
01039 // -------------------------------------------------------------------------
01040 #ifdef KMP_GOMP_COMPAT
01041 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)                                     \
01042     if (( FLAG ) && ( __kmp_atomic_mode == 2 )) {                          \
01043         KMP_CHECK_GTID;                                                    \
01044         MIN_MAX_CRITSECT( OP, 0 );                                         \
01045         return;                                                            \
01046     }
01047 #else
01048 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
01049 #endif /* KMP_GOMP_COMPAT */
01050 
01051 // -------------------------------------------------------------------------
01052 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP)                                      \
01053     {                                                                      \
01054         TYPE KMP_ATOMIC_VOLATILE temp_val;                                 \
01055         TYPE old_value;                                                    \
01056         temp_val = *lhs;                                                   \
01057         old_value = temp_val;                                              \
01058         while ( old_value OP rhs &&          /* still need actions? */     \
01059             ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs,      \
01060                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,          \
01061                       *VOLATILE_CAST(kmp_int##BITS *) &rhs ) )             \
01062         {                                                                  \
01063             KMP_CPU_PAUSE();                                               \
01064             temp_val = *lhs;                                               \
01065             old_value = temp_val;                                          \
01066         }                                                                  \
01067     }
01068 
01069 // -------------------------------------------------------------------------
01070 // 1-byte, 2-byte operands - use critical section
01071 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
01072 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
01073     if ( *lhs OP rhs ) {     /* need actions? */                           \
01074         GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
01075         MIN_MAX_CRITSECT(OP,LCK_ID)                                        \
01076     }                                                                      \
01077 }
01078 
01079 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
01080 
01081 // -------------------------------------------------------------------------
01082 // X86 or X86_64: no alignment problems ====================================
01083 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
01084 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
01085     if ( *lhs OP rhs ) {                                                   \
01086         GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
01087         MIN_MAX_CMPXCHG(TYPE,BITS,OP)                                      \
01088     }                                                                      \
01089 }
01090 
01091 #else
01092 // -------------------------------------------------------------------------
01093 // Code for other architectures that don't handle unaligned accesses.
01094 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
01095 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
01096     if ( *lhs OP rhs ) {                                                   \
01097         GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
01098         if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                       \
01099             MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */            \
01100         } else {                                                           \
01101             KMP_CHECK_GTID;                                                \
01102             MIN_MAX_CRITSECT(OP,LCK_ID)   /* unaligned address */          \
01103         }                                                                  \
01104     }                                                                      \
01105 }
01106 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
01107 
01108 MIN_MAX_COMPXCHG( fixed1,  max, char,        8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
01109 MIN_MAX_COMPXCHG( fixed1,  min, char,        8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
01110 MIN_MAX_COMPXCHG( fixed2,  max, short,      16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
01111 MIN_MAX_COMPXCHG( fixed2,  min, short,      16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
01112 MIN_MAX_COMPXCHG( fixed4,  max, kmp_int32,  32, <, 4i, 3, 0 )            // __kmpc_atomic_fixed4_max
01113 MIN_MAX_COMPXCHG( fixed4,  min, kmp_int32,  32, >, 4i, 3, 0 )            // __kmpc_atomic_fixed4_min
01114 MIN_MAX_COMPXCHG( fixed8,  max, kmp_int64,  64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
01115 MIN_MAX_COMPXCHG( fixed8,  min, kmp_int64,  64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
01116 MIN_MAX_COMPXCHG( float4,  max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
01117 MIN_MAX_COMPXCHG( float4,  min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
01118 MIN_MAX_COMPXCHG( float8,  max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
01119 MIN_MAX_COMPXCHG( float8,  min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
01120 MIN_MAX_CRITICAL( float16, max,     QUAD_LEGACY,      <, 16r,   1 )            // __kmpc_atomic_float16_max
01121 MIN_MAX_CRITICAL( float16, min,     QUAD_LEGACY,      >, 16r,   1 )            // __kmpc_atomic_float16_min
01122 #if ( KMP_ARCH_X86 )
01123     MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t,     <, 16r,   1 )            // __kmpc_atomic_float16_max_a16
01124     MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t,     >, 16r,   1 )            // __kmpc_atomic_float16_min_a16
01125 #endif
01126 // ------------------------------------------------------------------------
01127 // Need separate macros for .EQV. because of the need of complement (~)
01128 // OP ignored for critical sections, ^=~ used instead
01129 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
01130 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01131     OP_GOMP_CRITICAL(^=~,GOMP_FLAG)  /* send assignment */                \
01132     OP_CRITICAL(^=~,LCK_ID)    /* send assignment and complement */       \
01133 }
01134 
01135 // ------------------------------------------------------------------------
01136 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
01137 // ------------------------------------------------------------------------
01138 // X86 or X86_64: no alignment problems ===================================
01139 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
01140 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01141     OP_GOMP_CRITICAL(^=~,GOMP_FLAG)  /* send assignment */                \
01142     OP_CMPXCHG(TYPE,BITS,OP)                                              \
01143 }
01144 // ------------------------------------------------------------------------
01145 #else
01146 // ------------------------------------------------------------------------
01147 // Code for other architectures that don't handle unaligned accesses.
01148 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
01149 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01150     OP_GOMP_CRITICAL(^=~,GOMP_FLAG)                                       \
01151     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                          \
01152         OP_CMPXCHG(TYPE,BITS,OP)   /* aligned address */                  \
01153     } else {                                                              \
01154         KMP_CHECK_GTID;                                                   \
01155         OP_CRITICAL(^=~,LCK_ID)    /* unaligned address - use critical */ \
01156     }                                                                     \
01157 }
01158 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
01159 
01160 ATOMIC_CMPXCHG(  fixed1, neqv, kmp_int8,   8,   ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
01161 ATOMIC_CMPXCHG(  fixed2, neqv, kmp_int16, 16,   ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
01162 ATOMIC_CMPXCHG(  fixed4, neqv, kmp_int32, 32,   ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
01163 ATOMIC_CMPXCHG(  fixed8, neqv, kmp_int64, 64,   ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
01164 ATOMIC_CMPX_EQV( fixed1, eqv,  kmp_int8,   8,  ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
01165 ATOMIC_CMPX_EQV( fixed2, eqv,  kmp_int16, 16,  ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
01166 ATOMIC_CMPX_EQV( fixed4, eqv,  kmp_int32, 32,  ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
01167 ATOMIC_CMPX_EQV( fixed8, eqv,  kmp_int64, 64,  ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
01168 
01169 
01170 // ------------------------------------------------------------------------
01171 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
01172 //     TYPE_ID, OP_ID, TYPE - detailed above
01173 //     OP      - operator
01174 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
01175 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
01176 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01177     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)  /* send assignment */              \
01178     OP_CRITICAL(OP##=,LCK_ID)          /* send assignment */              \
01179 }
01180 
01181 /* ------------------------------------------------------------------------- */
01182 // routines for long double type
01183 ATOMIC_CRITICAL( float10, add, long double,     +, 10r,   1 )            // __kmpc_atomic_float10_add
01184 ATOMIC_CRITICAL( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub
01185 ATOMIC_CRITICAL( float10, mul, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul
01186 ATOMIC_CRITICAL( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div
01187 // routines for _Quad type
01188 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add
01189 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub
01190 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY,     *, 16r,   1 )            // __kmpc_atomic_float16_mul
01191 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div
01192 #if ( KMP_ARCH_X86 )
01193     ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 )           // __kmpc_atomic_float16_add_a16
01194     ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16
01195     ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 )           // __kmpc_atomic_float16_mul_a16
01196     ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16
01197 #endif
01198 // routines for complex types
01199 
01200 // workaround for C78287 (complex(kind=4) data type)
01201 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_add
01202 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_sub
01203 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_mul
01204 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_div
01205 // end of the workaround for C78287
01206 
01207 ATOMIC_CRITICAL( cmplx8,  add, kmp_cmplx64,     +, 16c,   1 )            // __kmpc_atomic_cmplx8_add
01208 ATOMIC_CRITICAL( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub
01209 ATOMIC_CRITICAL( cmplx8,  mul, kmp_cmplx64,     *, 16c,   1 )            // __kmpc_atomic_cmplx8_mul
01210 ATOMIC_CRITICAL( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div
01211 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80,     +, 20c,   1 )            // __kmpc_atomic_cmplx10_add
01212 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub
01213 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80,     *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul
01214 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div
01215 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG,     +, 32c,   1 )            // __kmpc_atomic_cmplx16_add
01216 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub
01217 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG,     *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul
01218 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div
01219 #if ( KMP_ARCH_X86 )
01220     ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 )   // __kmpc_atomic_cmplx16_add_a16
01221     ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16
01222     ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 )   // __kmpc_atomic_cmplx16_mul_a16
01223     ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16
01224 #endif
01225 
01226 #if OMP_40_ENABLED
01227 
01228 // OpenMP 4.0: x = expr binop x for non-commutative operations.
01229 // Supported only on IA-32 architecture and Intel(R) 64
01230 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
01231 
01232 // ------------------------------------------------------------------------
01233 // Operation on *lhs, rhs bound by critical section
01234 //     OP     - operator (it's supposed to contain an assignment)
01235 //     LCK_ID - lock identifier
01236 // Note: don't check gtid as it should always be valid
01237 // 1, 2-byte - expect valid parameter, other - check before this macro
01238 #define OP_CRITICAL_REV(OP,LCK_ID) \
01239     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
01240                                                                           \
01241     (*lhs) = (rhs) OP (*lhs);                                             \
01242                                                                           \
01243     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
01244 
01245 #ifdef KMP_GOMP_COMPAT
01246 #define OP_GOMP_CRITICAL_REV(OP,FLAG)                                     \
01247     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
01248         KMP_CHECK_GTID;                                                   \
01249         OP_CRITICAL_REV( OP, 0 );                                         \
01250         return;                                                           \
01251     }
01252 #else
01253 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
01254 #endif /* KMP_GOMP_COMPAT */
01255 
01256 
01257 // Beginning of a definition (provides name, parameters, gebug trace)
01258 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
01259 //     OP_ID   - operation identifier (add, sub, mul, ...)
01260 //     TYPE    - operands' type
01261 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
01262 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
01263 {                                                                                         \
01264     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
01265     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
01266 
01267 // ------------------------------------------------------------------------
01268 // Operation on *lhs, rhs using "compare_and_store" routine
01269 //     TYPE    - operands' type
01270 //     BITS    - size in bits, used to distinguish low level calls
01271 //     OP      - operator
01272 // Note: temp_val introduced in order to force the compiler to read
01273 //       *lhs only once (w/o it the compiler reads *lhs twice)
01274 #define OP_CMPXCHG_REV(TYPE,BITS,OP)                                      \
01275     {                                                                     \
01276         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
01277         TYPE old_value, new_value;                                        \
01278         temp_val = *lhs;                                                  \
01279         old_value = temp_val;                                             \
01280         new_value = rhs OP old_value;                                     \
01281         while ( ! KMP_EX_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
01282                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
01283                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
01284         {                                                                 \
01285             KMP_DO_PAUSE;                                                 \
01286                                                                           \
01287             temp_val = *lhs;                                              \
01288             old_value = temp_val;                                         \
01289             new_value = rhs OP old_value;                                 \
01290         }                                                                 \
01291     }
01292 
01293 // -------------------------------------------------------------------------
01294 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG)   \
01295 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void)                                 \
01296     OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                    \
01297     OP_CMPXCHG_REV(TYPE,BITS,OP)                                          \
01298 }
01299 
01300 // ------------------------------------------------------------------------
01301 // Entries definition for integer operands
01302 //     TYPE_ID - operands type and size (fixed4, float4)
01303 //     OP_ID   - operation identifier (add, sub, mul, ...)
01304 //     TYPE    - operand type
01305 //     BITS    - size in bits, used to distinguish low level calls
01306 //     OP      - operator (used in critical section)
01307 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
01308 
01309 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
01310 // ------------------------------------------------------------------------
01311 // Routines for ATOMIC integer operands, other operators
01312 // ------------------------------------------------------------------------
01313 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
01314 ATOMIC_CMPXCHG_REV( fixed1,  div, kmp_int8,    8, /,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_rev
01315 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8,   8, /,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_rev
01316 ATOMIC_CMPXCHG_REV( fixed1,  shl, kmp_int8,    8, <<, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_rev
01317 ATOMIC_CMPXCHG_REV( fixed1,  shr, kmp_int8,    8, >>, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_rev
01318 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8,   8, >>, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_rev
01319 ATOMIC_CMPXCHG_REV( fixed1,  sub, kmp_int8,    8, -,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_rev
01320 
01321 ATOMIC_CMPXCHG_REV( fixed2,  div, kmp_int16,  16, /,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_rev
01322 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_rev
01323 ATOMIC_CMPXCHG_REV( fixed2,  shl, kmp_int16,  16, <<, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_rev
01324 ATOMIC_CMPXCHG_REV( fixed2,  shr, kmp_int16,  16, >>, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_rev
01325 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_rev
01326 ATOMIC_CMPXCHG_REV( fixed2,  sub, kmp_int16,  16, -,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_rev
01327 
01328 ATOMIC_CMPXCHG_REV( fixed4,  div, kmp_int32,  32, /,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_rev
01329 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_rev
01330 ATOMIC_CMPXCHG_REV( fixed4,  shl, kmp_int32,  32, <<, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_rev
01331 ATOMIC_CMPXCHG_REV( fixed4,  shr, kmp_int32,  32, >>, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_rev
01332 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_rev
01333 ATOMIC_CMPXCHG_REV( fixed4,  sub, kmp_int32,  32, -,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_sub_rev
01334 
01335 ATOMIC_CMPXCHG_REV( fixed8,  div, kmp_int64,  64, /,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_rev
01336 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_rev
01337 ATOMIC_CMPXCHG_REV( fixed8,  shl, kmp_int64,  64, <<, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_rev
01338 ATOMIC_CMPXCHG_REV( fixed8,  shr, kmp_int64,  64, >>, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_rev
01339 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_rev
01340 ATOMIC_CMPXCHG_REV( fixed8,  sub, kmp_int64,  64, -,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_rev
01341 
01342 ATOMIC_CMPXCHG_REV( float4,  div, kmp_real32, 32, /,  4r, KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_rev
01343 ATOMIC_CMPXCHG_REV( float4,  sub, kmp_real32, 32, -,  4r, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_rev
01344 
01345 ATOMIC_CMPXCHG_REV( float8,  div, kmp_real64, 64, /,  8r, KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_rev
01346 ATOMIC_CMPXCHG_REV( float8,  sub, kmp_real64, 64, -,  8r, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_rev
01347 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
01348 
01349 // ------------------------------------------------------------------------
01350 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
01351 //     TYPE_ID, OP_ID, TYPE - detailed above
01352 //     OP      - operator
01353 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
01354 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
01355 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void)                                     \
01356     OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                        \
01357     OP_CRITICAL_REV(OP,LCK_ID)                                                \
01358 }
01359 
01360 /* ------------------------------------------------------------------------- */
01361 // routines for long double type
01362 ATOMIC_CRITICAL_REV( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_rev
01363 ATOMIC_CRITICAL_REV( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_rev
01364 // routines for _Quad type
01365 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_rev
01366 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_rev
01367 #if ( KMP_ARCH_X86 )
01368     ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16_rev
01369     ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16_rev
01370 #endif
01371 
01372 // routines for complex types
01373 ATOMIC_CRITICAL_REV( cmplx4,  sub, kmp_cmplx32,     -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_rev
01374 ATOMIC_CRITICAL_REV( cmplx4,  div, kmp_cmplx32,     /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_rev
01375 ATOMIC_CRITICAL_REV( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_rev
01376 ATOMIC_CRITICAL_REV( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_rev
01377 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_rev
01378 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_rev
01379 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_rev
01380 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_rev
01381 #if ( KMP_ARCH_X86 )
01382     ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16_rev
01383     ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16_rev
01384 #endif
01385 
01386 
01387 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
01388 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
01389 
01390 #endif //OMP_40_ENABLED
01391 
01392 
01393 /* ------------------------------------------------------------------------ */
01394 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
01395 /* Note: in order to reduce the total number of types combinations          */
01396 /*       it is supposed that compiler converts RHS to longest floating type,*/
01397 /*       that is _Quad, before call to any of these routines                */
01398 /* Conversion to _Quad will be done by the compiler during calculation,     */
01399 /*    conversion back to TYPE - before the assignment, like:                */
01400 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
01401 /* Performance penalty expected because of SW emulation use                 */
01402 /* ------------------------------------------------------------------------ */
01403 
01404 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                             \
01405 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
01406 {                                                                                                       \
01407     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                              \
01408     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
01409 
01410 // -------------------------------------------------------------------------
01411 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG)         \
01412 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                       \
01413     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)  /* send assignment */                              \
01414     OP_CRITICAL(OP##=,LCK_ID)  /* send assignment */                                      \
01415 }
01416 
01417 // -------------------------------------------------------------------------
01418 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
01419 // -------------------------------------------------------------------------
01420 // X86 or X86_64: no alignment problems ====================================
01421 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
01422 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \
01423     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                       \
01424     OP_CMPXCHG(TYPE,BITS,OP)                                                                \
01425 }
01426 // -------------------------------------------------------------------------
01427 #else
01428 // ------------------------------------------------------------------------
01429 // Code for other architectures that don't handle unaligned accesses.
01430 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
01431 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \
01432     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                       \
01433     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                            \
01434         OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                                  \
01435     } else {                                                                                \
01436         KMP_CHECK_GTID;                                                                     \
01437         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */                   \
01438     }                                                                                       \
01439 }
01440 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
01441 
01442 // RHS=float8
01443 ATOMIC_CMPXCHG_MIX( fixed1, char,       mul,  8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
01444 ATOMIC_CMPXCHG_MIX( fixed1, char,       div,  8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
01445 ATOMIC_CMPXCHG_MIX( fixed2, short,      mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
01446 ATOMIC_CMPXCHG_MIX( fixed2, short,      div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
01447 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32,  mul, 32, *, float8, kmp_real64, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_float8
01448 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32,  div, 32, /, float8, kmp_real64, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_float8
01449 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64,  mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
01450 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64,  div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
01451 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
01452 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
01453 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
01454 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
01455 
01456 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
01457 
01458 ATOMIC_CMPXCHG_MIX( fixed1,  char,       add,  8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
01459 ATOMIC_CMPXCHG_MIX( fixed1,  char,       sub,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
01460 ATOMIC_CMPXCHG_MIX( fixed1,  char,       mul,  8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
01461 ATOMIC_CMPXCHG_MIX( fixed1,  char,       div,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
01462 ATOMIC_CMPXCHG_MIX( fixed1u, uchar,      div,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
01463 
01464 ATOMIC_CMPXCHG_MIX( fixed2,  short,      add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
01465 ATOMIC_CMPXCHG_MIX( fixed2,  short,      sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
01466 ATOMIC_CMPXCHG_MIX( fixed2,  short,      mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
01467 ATOMIC_CMPXCHG_MIX( fixed2,  short,      div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
01468 ATOMIC_CMPXCHG_MIX( fixed2u, ushort,     div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
01469 
01470 ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  add, 32, +, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_add_fp
01471 ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  sub, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_sub_fp
01472 ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  mul, 32, *, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_fp
01473 ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  div, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_fp
01474 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_div_fp
01475 
01476 ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
01477 ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
01478 ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
01479 ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
01480 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
01481 
01482 ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
01483 ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
01484 ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
01485 ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
01486 
01487 ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
01488 ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
01489 ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
01490 ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
01491 
01492 ATOMIC_CRITICAL_FP( float10, long double,    add, +, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_add_fp
01493 ATOMIC_CRITICAL_FP( float10, long double,    sub, -, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_sub_fp
01494 ATOMIC_CRITICAL_FP( float10, long double,    mul, *, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_mul_fp
01495 ATOMIC_CRITICAL_FP( float10, long double,    div, /, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_div_fp
01496 
01497 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
01498 // ------------------------------------------------------------------------
01499 // X86 or X86_64: no alignment problems ====================================
01500 // workaround for C78287 (complex(kind=4) data type)
01501 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
01502 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
01503     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
01504     OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                                       \
01505 }
01506 // end of the second part of the workaround for C78287
01507 #else
01508 // ------------------------------------------------------------------------
01509 // Code for other architectures that don't handle unaligned accesses.
01510 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
01511 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
01512     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
01513     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                              \
01514         OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                                    \
01515     } else {                                                                                  \
01516         KMP_CHECK_GTID;                                                                       \
01517         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */                     \
01518     }                                                                                         \
01519 }
01520 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
01521 
01522 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
01523 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
01524 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
01525 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
01526 
01527 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
01528 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
01529 
01531 // ------------------------------------------------------------------------
01532 // Atomic READ routines
01533 // ------------------------------------------------------------------------
01534 
01535 // ------------------------------------------------------------------------
01536 // Beginning of a definition (provides name, parameters, gebug trace)
01537 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
01538 //     OP_ID   - operation identifier (add, sub, mul, ...)
01539 //     TYPE    - operands' type
01540 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
01541 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
01542 {                                                                                   \
01543     KMP_DEBUG_ASSERT( __kmp_init_serial );                                          \
01544     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
01545 
01546 // ------------------------------------------------------------------------
01547 // Operation on *lhs, rhs using "compare_and_store_ret" routine
01548 //     TYPE    - operands' type
01549 //     BITS    - size in bits, used to distinguish low level calls
01550 //     OP      - operator
01551 // Note: temp_val introduced in order to force the compiler to read
01552 //       *lhs only once (w/o it the compiler reads *lhs twice)
01553 // TODO: check if it is still necessary
01554 // Return old value regardless of the result of "compare & swap# operation
01555 
01556 #define OP_CMPXCHG_READ(TYPE,BITS,OP)                                     \
01557     {                                                                     \
01558         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
01559         union f_i_union {                                                 \
01560             TYPE f_val;                                                   \
01561             kmp_int##BITS i_val;                                          \
01562         };                                                                \
01563         union f_i_union old_value;                                        \
01564         temp_val = *loc;                                                  \
01565         old_value.f_val = temp_val;                                       \
01566         old_value.i_val = __kmp_compare_and_store_ret##BITS( (kmp_int##BITS *) loc, \
01567                       *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val,   \
01568                       *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
01569         new_value = old_value.f_val;                                      \
01570         return new_value;                                                 \
01571     }
01572 
01573 // -------------------------------------------------------------------------
01574 // Operation on *lhs, rhs bound by critical section
01575 //     OP     - operator (it's supposed to contain an assignment)
01576 //     LCK_ID - lock identifier
01577 // Note: don't check gtid as it should always be valid
01578 // 1, 2-byte - expect valid parameter, other - check before this macro
01579 #define OP_CRITICAL_READ(OP,LCK_ID)                                       \
01580     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
01581                                                                           \
01582     new_value = (*loc);                                                   \
01583                                                                           \
01584     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
01585 
01586 // -------------------------------------------------------------------------
01587 #ifdef KMP_GOMP_COMPAT
01588 #define OP_GOMP_CRITICAL_READ(OP,FLAG)                                    \
01589     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
01590         KMP_CHECK_GTID;                                                   \
01591         OP_CRITICAL_READ( OP, 0 );                                        \
01592         return new_value;                                                 \
01593     }
01594 #else
01595 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
01596 #endif /* KMP_GOMP_COMPAT */
01597 
01598 // -------------------------------------------------------------------------
01599 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
01600 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
01601     TYPE new_value;                                                       \
01602     OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)                                \
01603     new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 );                     \
01604     return new_value;                                                     \
01605 }
01606 // -------------------------------------------------------------------------
01607 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
01608 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
01609     TYPE new_value;                                                       \
01610     OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)                                \
01611     OP_CMPXCHG_READ(TYPE,BITS,OP)                                         \
01612 }
01613 // ------------------------------------------------------------------------
01614 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
01615 //     TYPE_ID, OP_ID, TYPE - detailed above
01616 //     OP      - operator
01617 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
01618 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)      \
01619 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
01620     TYPE new_value;                                                       \
01621     OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)  /* send assignment */         \
01622     OP_CRITICAL_READ(OP,LCK_ID)          /* send assignment */            \
01623     return new_value;                                                     \
01624 }
01625 
01626 // ------------------------------------------------------------------------
01627 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
01628 // Let's return the read value through the additional parameter.
01629 
01630 #if ( KMP_OS_WINDOWS )
01631 
01632 #define OP_CRITICAL_READ_WRK(OP,LCK_ID)                                   \
01633     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
01634                                                                           \
01635     (*out) = (*loc);                                                      \
01636                                                                           \
01637     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
01638 // ------------------------------------------------------------------------
01639 #ifdef KMP_GOMP_COMPAT
01640 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)                                \
01641     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
01642         KMP_CHECK_GTID;                                                   \
01643         OP_CRITICAL_READ_WRK( OP, 0 );                                    \
01644     }
01645 #else
01646 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
01647 #endif /* KMP_GOMP_COMPAT */
01648 // ------------------------------------------------------------------------
01649 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
01650 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
01651 {                                                                                   \
01652     KMP_DEBUG_ASSERT( __kmp_init_serial );                                          \
01653     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
01654 
01655 // ------------------------------------------------------------------------
01656 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)      \
01657 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE)                                     \
01658     OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG)  /* send assignment */         \
01659     OP_CRITICAL_READ_WRK(OP,LCK_ID)          /* send assignment */            \
01660 }
01661 
01662 #endif // KMP_OS_WINDOWS
01663 
01664 // ------------------------------------------------------------------------
01665 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
01666 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32,  32, +, 0            )      // __kmpc_atomic_fixed4_rd
01667 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64,  64, +, KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_rd
01668 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 )    // __kmpc_atomic_float4_rd
01669 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 )    // __kmpc_atomic_float8_rd
01670 
01671 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
01672 ATOMIC_CMPXCHG_READ( fixed1,  rd, kmp_int8,    8, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_rd
01673 ATOMIC_CMPXCHG_READ( fixed2,  rd, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_rd
01674 
01675 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r,   1 )         // __kmpc_atomic_float10_rd
01676 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r,   1 )         // __kmpc_atomic_float16_rd
01677 
01678 // Fix for CQ220361 on Windows* OS
01679 #if ( KMP_OS_WINDOWS )
01680     ATOMIC_CRITICAL_READ_WRK( cmplx4,  rd, kmp_cmplx32, +,  8c, 1 )   // __kmpc_atomic_cmplx4_rd
01681 #else
01682     ATOMIC_CRITICAL_READ( cmplx4,  rd, kmp_cmplx32, +,  8c, 1 )       // __kmpc_atomic_cmplx4_rd
01683 #endif
01684 ATOMIC_CRITICAL_READ( cmplx8,  rd, kmp_cmplx64, +, 16c, 1 )           // __kmpc_atomic_cmplx8_rd
01685 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 )           // __kmpc_atomic_cmplx10_rd
01686 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 )           // __kmpc_atomic_cmplx16_rd
01687 #if ( KMP_ARCH_X86 )
01688     ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 )         // __kmpc_atomic_float16_a16_rd
01689     ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
01690 #endif
01691 
01692 
01693 // ------------------------------------------------------------------------
01694 // Atomic WRITE routines
01695 // ------------------------------------------------------------------------
01696 
01697 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)              \
01698 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01699     OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
01700     __kmp_xchg_fixed##BITS( lhs, rhs );                                   \
01701 }
01702 // ------------------------------------------------------------------------
01703 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)        \
01704 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01705     OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
01706     __kmp_xchg_real##BITS( lhs, rhs );                                    \
01707 }
01708 
01709 
01710 // ------------------------------------------------------------------------
01711 // Operation on *lhs, rhs using "compare_and_store" routine
01712 //     TYPE    - operands' type
01713 //     BITS    - size in bits, used to distinguish low level calls
01714 //     OP      - operator
01715 // Note: temp_val introduced in order to force the compiler to read
01716 //       *lhs only once (w/o it the compiler reads *lhs twice)
01717 #define OP_CMPXCHG_WR(TYPE,BITS,OP)                                       \
01718     {                                                                     \
01719         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
01720         TYPE old_value, new_value;                                        \
01721         temp_val = *lhs;                                                  \
01722         old_value = temp_val;                                             \
01723         new_value = rhs;                                                  \
01724         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
01725                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
01726                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
01727         {                                                                 \
01728             KMP_CPU_PAUSE();                                              \
01729                                                                           \
01730             temp_val = *lhs;                                              \
01731             old_value = temp_val;                                         \
01732             new_value = rhs;                                              \
01733         }                                                                 \
01734     }
01735 
01736 // -------------------------------------------------------------------------
01737 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
01738 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01739     OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
01740     OP_CMPXCHG_WR(TYPE,BITS,OP)                                           \
01741 }
01742 
01743 // ------------------------------------------------------------------------
01744 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
01745 //     TYPE_ID, OP_ID, TYPE - detailed above
01746 //     OP      - operator
01747 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
01748 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)        \
01749 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
01750     OP_GOMP_CRITICAL(OP,GOMP_FLAG)       /* send assignment */            \
01751     OP_CRITICAL(OP,LCK_ID)               /* send assignment */            \
01752 }
01753 // -------------------------------------------------------------------------
01754 
01755 ATOMIC_XCHG_WR( fixed1,  wr, kmp_int8,    8, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_wr
01756 ATOMIC_XCHG_WR( fixed2,  wr, kmp_int16,  16, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_wr
01757 ATOMIC_XCHG_WR( fixed4,  wr, kmp_int32,  32, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_wr
01758 #if ( KMP_ARCH_X86 )
01759     ATOMIC_CMPXCHG_WR( fixed8,  wr, kmp_int64,  64, =,  KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_wr
01760 #else
01761     ATOMIC_XCHG_WR( fixed8,  wr, kmp_int64,  64, =,  KMP_ARCH_X86 )         // __kmpc_atomic_fixed8_wr
01762 #endif
01763 
01764 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 )         // __kmpc_atomic_float4_wr
01765 #if ( KMP_ARCH_X86 )
01766     ATOMIC_CMPXCHG_WR( float8,  wr, kmp_real64,  64, =,  KMP_ARCH_X86 )     // __kmpc_atomic_float8_wr
01767 #else
01768     ATOMIC_XCHG_FLOAT_WR( float8,  wr, kmp_real64,  64, =,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_wr
01769 #endif
01770 
01771 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r,   1 )         // __kmpc_atomic_float10_wr
01772 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r,   1 )         // __kmpc_atomic_float16_wr
01773 ATOMIC_CRITICAL_WR( cmplx4,  wr, kmp_cmplx32, =,  8c,   1 )         // __kmpc_atomic_cmplx4_wr
01774 ATOMIC_CRITICAL_WR( cmplx8,  wr, kmp_cmplx64, =, 16c,   1 )         // __kmpc_atomic_cmplx8_wr
01775 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c,   1 )         // __kmpc_atomic_cmplx10_wr
01776 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c,   1 )         // __kmpc_atomic_cmplx16_wr
01777 #if ( KMP_ARCH_X86 )
01778     ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t,         =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
01779     ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
01780 #endif
01781 
01782 
01783 // ------------------------------------------------------------------------
01784 // Atomic CAPTURE routines
01785 // ------------------------------------------------------------------------
01786 
01787 // Beginning of a definition (provides name, parameters, gebug trace)
01788 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
01789 //     OP_ID   - operation identifier (add, sub, mul, ...)
01790 //     TYPE    - operands' type
01791 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE)                                    \
01792 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
01793 {                                                                                         \
01794     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
01795     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
01796 
01797 // -------------------------------------------------------------------------
01798 // Operation on *lhs, rhs bound by critical section
01799 //     OP     - operator (it's supposed to contain an assignment)
01800 //     LCK_ID - lock identifier
01801 // Note: don't check gtid as it should always be valid
01802 // 1, 2-byte - expect valid parameter, other - check before this macro
01803 #define OP_CRITICAL_CPT(OP,LCK_ID)                                        \
01804     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
01805                                                                           \
01806     if( flag ) {                                                          \
01807         (*lhs) OP rhs;                                                    \
01808         new_value = (*lhs);                                               \
01809     } else {                                                              \
01810         new_value = (*lhs);                                               \
01811         (*lhs) OP rhs;                                                    \
01812     }                                                                     \
01813                                                                           \
01814     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
01815     return new_value;
01816 
01817 // ------------------------------------------------------------------------
01818 #ifdef KMP_GOMP_COMPAT
01819 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)                                     \
01820     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
01821         KMP_CHECK_GTID;                                                   \
01822         OP_CRITICAL_CPT( OP##=, 0 );                                      \
01823     }
01824 #else
01825 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
01826 #endif /* KMP_GOMP_COMPAT */
01827 
01828 // ------------------------------------------------------------------------
01829 // Operation on *lhs, rhs using "compare_and_store" routine
01830 //     TYPE    - operands' type
01831 //     BITS    - size in bits, used to distinguish low level calls
01832 //     OP      - operator
01833 // Note: temp_val introduced in order to force the compiler to read
01834 //       *lhs only once (w/o it the compiler reads *lhs twice)
01835 #define OP_CMPXCHG_CPT(TYPE,BITS,OP)                                      \
01836     {                                                                     \
01837         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
01838         TYPE old_value, new_value;                                        \
01839         temp_val = *lhs;                                                  \
01840         old_value = temp_val;                                             \
01841         new_value = old_value OP rhs;                                     \
01842         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
01843                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
01844                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
01845         {                                                                 \
01846             KMP_CPU_PAUSE();                                              \
01847                                                                           \
01848             temp_val = *lhs;                                              \
01849             old_value = temp_val;                                         \
01850             new_value = old_value OP rhs;                                 \
01851         }                                                                 \
01852         if( flag ) {                                                      \
01853             return new_value;                                             \
01854         } else                                                            \
01855             return old_value;                                             \
01856     }
01857 
01858 // -------------------------------------------------------------------------
01859 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
01860 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
01861     TYPE new_value;                                                        \
01862     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \
01863     OP_CMPXCHG_CPT(TYPE,BITS,OP)                                           \
01864 }
01865 
01866 // -------------------------------------------------------------------------
01867 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
01868 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
01869     TYPE old_value, new_value;                                             \
01870     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \
01871     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \
01872     old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                    \
01873     if( flag ) {                                                           \
01874         return old_value OP rhs;                                           \
01875     } else                                                                 \
01876         return old_value;                                                  \
01877 }
01878 // -------------------------------------------------------------------------
01879 #define ATOMIC_FLOAT_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
01880 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
01881     TYPE old_value, new_value;                                             \
01882     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \
01883     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \
01884     old_value = __kmp_test_then_add_real##BITS( lhs, OP rhs );             \
01885     if( flag ) {                                                           \
01886         return old_value OP rhs;                                           \
01887     } else                                                                 \
01888         return old_value;                                                  \
01889 }
01890 // -------------------------------------------------------------------------
01891 
01892 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32,  32, +, 0            )  // __kmpc_atomic_fixed4_add_cpt
01893 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32,  32, -, 0            )  // __kmpc_atomic_fixed4_sub_cpt
01894 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64,  64, +, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_add_cpt
01895 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64,  64, -, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_cpt
01896 
01897 #if KMP_MIC
01898 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add_cpt
01899 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt
01900 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add_cpt
01901 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt
01902 #else
01903 ATOMIC_FLOAT_ADD_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add_cpt
01904 ATOMIC_FLOAT_ADD_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt
01905 ATOMIC_FLOAT_ADD_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add_cpt
01906 ATOMIC_FLOAT_ADD_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt
01907 #endif // KMP_MIC
01908 
01909 // ------------------------------------------------------------------------
01910 // Entries definition for integer operands
01911 //     TYPE_ID - operands type and size (fixed4, float4)
01912 //     OP_ID   - operation identifier (add, sub, mul, ...)
01913 //     TYPE    - operand type
01914 //     BITS    - size in bits, used to distinguish low level calls
01915 //     OP      - operator (used in critical section)
01916 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
01917 // ------------------------------------------------------------------------
01918 // Routines for ATOMIC integer operands, other operators
01919 // ------------------------------------------------------------------------
01920 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
01921 ATOMIC_CMPXCHG_CPT( fixed1,  add_cpt, kmp_int8,    8, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_add_cpt
01922 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8,    8, &,  0            )  // __kmpc_atomic_fixed1_andb_cpt
01923 ATOMIC_CMPXCHG_CPT( fixed1,  div_cpt, kmp_int8,    8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_cpt
01924 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8,   8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_cpt
01925 ATOMIC_CMPXCHG_CPT( fixed1,  mul_cpt, kmp_int8,    8, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_mul_cpt
01926 ATOMIC_CMPXCHG_CPT( fixed1,  orb_cpt, kmp_int8,    8, |,  0            )  // __kmpc_atomic_fixed1_orb_cpt
01927 ATOMIC_CMPXCHG_CPT( fixed1,  shl_cpt, kmp_int8,    8, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_cpt
01928 ATOMIC_CMPXCHG_CPT( fixed1,  shr_cpt, kmp_int8,    8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_cpt
01929 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8,   8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_cpt
01930 ATOMIC_CMPXCHG_CPT( fixed1,  sub_cpt, kmp_int8,    8, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_cpt
01931 ATOMIC_CMPXCHG_CPT( fixed1,  xor_cpt, kmp_int8,    8, ^,  0            )  // __kmpc_atomic_fixed1_xor_cpt
01932 ATOMIC_CMPXCHG_CPT( fixed2,  add_cpt, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_add_cpt
01933 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16,  16, &,  0            )  // __kmpc_atomic_fixed2_andb_cpt
01934 ATOMIC_CMPXCHG_CPT( fixed2,  div_cpt, kmp_int16,  16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_cpt
01935 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_cpt
01936 ATOMIC_CMPXCHG_CPT( fixed2,  mul_cpt, kmp_int16,  16, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_mul_cpt
01937 ATOMIC_CMPXCHG_CPT( fixed2,  orb_cpt, kmp_int16,  16, |,  0            )  // __kmpc_atomic_fixed2_orb_cpt
01938 ATOMIC_CMPXCHG_CPT( fixed2,  shl_cpt, kmp_int16,  16, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_cpt
01939 ATOMIC_CMPXCHG_CPT( fixed2,  shr_cpt, kmp_int16,  16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_cpt
01940 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_cpt
01941 ATOMIC_CMPXCHG_CPT( fixed2,  sub_cpt, kmp_int16,  16, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_cpt
01942 ATOMIC_CMPXCHG_CPT( fixed2,  xor_cpt, kmp_int16,  16, ^,  0            )  // __kmpc_atomic_fixed2_xor_cpt
01943 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32,  32, &,  0            )  // __kmpc_atomic_fixed4_andb_cpt
01944 ATOMIC_CMPXCHG_CPT( fixed4,  div_cpt, kmp_int32,  32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_cpt
01945 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_cpt
01946 ATOMIC_CMPXCHG_CPT( fixed4,  mul_cpt, kmp_int32,  32, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_mul_cpt
01947 ATOMIC_CMPXCHG_CPT( fixed4,  orb_cpt, kmp_int32,  32, |,  0            )  // __kmpc_atomic_fixed4_orb_cpt
01948 ATOMIC_CMPXCHG_CPT( fixed4,  shl_cpt, kmp_int32,  32, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_cpt
01949 ATOMIC_CMPXCHG_CPT( fixed4,  shr_cpt, kmp_int32,  32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_cpt
01950 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_cpt
01951 ATOMIC_CMPXCHG_CPT( fixed4,  xor_cpt, kmp_int32,  32, ^,  0            )  // __kmpc_atomic_fixed4_xor_cpt
01952 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64,  64, &,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andb_cpt
01953 ATOMIC_CMPXCHG_CPT( fixed8,  div_cpt, kmp_int64,  64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_cpt
01954 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_cpt
01955 ATOMIC_CMPXCHG_CPT( fixed8,  mul_cpt, kmp_int64,  64, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_mul_cpt
01956 ATOMIC_CMPXCHG_CPT( fixed8,  orb_cpt, kmp_int64,  64, |,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orb_cpt
01957 ATOMIC_CMPXCHG_CPT( fixed8,  shl_cpt, kmp_int64,  64, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_cpt
01958 ATOMIC_CMPXCHG_CPT( fixed8,  shr_cpt, kmp_int64,  64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_cpt
01959 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_cpt
01960 ATOMIC_CMPXCHG_CPT( fixed8,  xor_cpt, kmp_int64,  64, ^,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_xor_cpt
01961 ATOMIC_CMPXCHG_CPT( float4,  div_cpt, kmp_real32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_cpt
01962 ATOMIC_CMPXCHG_CPT( float4,  mul_cpt, kmp_real32, 32, *,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_mul_cpt
01963 ATOMIC_CMPXCHG_CPT( float8,  div_cpt, kmp_real64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_cpt
01964 ATOMIC_CMPXCHG_CPT( float8,  mul_cpt, kmp_real64, 64, *,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_mul_cpt
01965 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
01966 
01967 // ------------------------------------------------------------------------
01968 // Routines for C/C++ Reduction operators && and ||
01969 // ------------------------------------------------------------------------
01970 
01971 // -------------------------------------------------------------------------
01972 // Operation on *lhs, rhs bound by critical section
01973 //     OP     - operator (it's supposed to contain an assignment)
01974 //     LCK_ID - lock identifier
01975 // Note: don't check gtid as it should always be valid
01976 // 1, 2-byte - expect valid parameter, other - check before this macro
01977 #define OP_CRITICAL_L_CPT(OP,LCK_ID)                                      \
01978     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
01979                                                                           \
01980     if( flag ) {                                                          \
01981         new_value OP rhs;                                                 \
01982     } else                                                                \
01983         new_value = (*lhs);                                               \
01984                                                                           \
01985     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
01986 
01987 // ------------------------------------------------------------------------
01988 #ifdef KMP_GOMP_COMPAT
01989 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)                                   \
01990     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
01991         KMP_CHECK_GTID;                                                   \
01992         OP_CRITICAL_L_CPT( OP, 0 );                                       \
01993         return new_value;                                                 \
01994     }
01995 #else
01996 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
01997 #endif /* KMP_GOMP_COMPAT */
01998 
01999 // ------------------------------------------------------------------------
02000 // Need separate macros for &&, || because there is no combined assignment
02001 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
02002 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                 \
02003     TYPE new_value;                                                       \
02004     OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG )                        \
02005     OP_CMPXCHG_CPT(TYPE,BITS,OP)                                          \
02006 }
02007 
02008 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char,       8, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_andl_cpt
02009 ATOMIC_CMPX_L_CPT( fixed1,  orl_cpt, char,       8, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_orl_cpt
02010 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short,     16, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_andl_cpt
02011 ATOMIC_CMPX_L_CPT( fixed2,  orl_cpt, short,     16, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_orl_cpt
02012 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 )             // __kmpc_atomic_fixed4_andl_cpt
02013 ATOMIC_CMPX_L_CPT( fixed4,  orl_cpt, kmp_int32, 32, ||, 0 )             // __kmpc_atomic_fixed4_orl_cpt
02014 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andl_cpt
02015 ATOMIC_CMPX_L_CPT( fixed8,  orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orl_cpt
02016 
02017 
02018 // -------------------------------------------------------------------------
02019 // Routines for Fortran operators that matched no one in C:
02020 // MAX, MIN, .EQV., .NEQV.
02021 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
02022 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
02023 // -------------------------------------------------------------------------
02024 
02025 // -------------------------------------------------------------------------
02026 // MIN and MAX need separate macros
02027 // OP - operator to check if we need any actions?
02028 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID)                                    \
02029     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
02030                                                                            \
02031     if ( *lhs OP rhs ) {                 /* still need actions? */         \
02032         old_value = *lhs;                                                  \
02033         *lhs = rhs;                                                        \
02034         if ( flag )                                                        \
02035             new_value = rhs;                                               \
02036         else                                                               \
02037             new_value = old_value;                                         \
02038     }                                                                      \
02039     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
02040     return new_value;                                                      \
02041 
02042 // -------------------------------------------------------------------------
02043 #ifdef KMP_GOMP_COMPAT
02044 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)                                 \
02045     if (( FLAG ) && ( __kmp_atomic_mode == 2 )) {                          \
02046         KMP_CHECK_GTID;                                                    \
02047         MIN_MAX_CRITSECT_CPT( OP, 0 );                                     \
02048     }
02049 #else
02050 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
02051 #endif /* KMP_GOMP_COMPAT */
02052 
02053 // -------------------------------------------------------------------------
02054 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP)                                  \
02055     {                                                                      \
02056         TYPE KMP_ATOMIC_VOLATILE temp_val;                                 \
02057         /*TYPE old_value; */                                               \
02058         temp_val = *lhs;                                                   \
02059         old_value = temp_val;                                              \
02060         while ( old_value OP rhs &&          /* still need actions? */     \
02061             ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs,      \
02062                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,          \
02063                       *VOLATILE_CAST(kmp_int##BITS *) &rhs ) )             \
02064         {                                                                  \
02065             KMP_CPU_PAUSE();                                               \
02066             temp_val = *lhs;                                               \
02067             old_value = temp_val;                                          \
02068         }                                                                  \
02069         if( flag )                                                         \
02070             return rhs;                                                    \
02071         else                                                               \
02072             return old_value;                                              \
02073     }
02074 
02075 // -------------------------------------------------------------------------
02076 // 1-byte, 2-byte operands - use critical section
02077 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)       \
02078 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
02079     TYPE new_value, old_value;                                             \
02080     if ( *lhs OP rhs ) {     /* need actions? */                           \
02081         GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG)                            \
02082         MIN_MAX_CRITSECT_CPT(OP,LCK_ID)                                    \
02083     }                                                                      \
02084     return *lhs;                                                           \
02085 }
02086 
02087 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
02088 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
02089     TYPE new_value, old_value;                                             \
02090     if ( *lhs OP rhs ) {                                                   \
02091         GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG)                            \
02092         MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP)                                  \
02093     }                                                                      \
02094     return *lhs;                                                           \
02095 }
02096 
02097 
02098 MIN_MAX_COMPXCHG_CPT( fixed1,  max_cpt, char,        8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
02099 MIN_MAX_COMPXCHG_CPT( fixed1,  min_cpt, char,        8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
02100 MIN_MAX_COMPXCHG_CPT( fixed2,  max_cpt, short,      16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
02101 MIN_MAX_COMPXCHG_CPT( fixed2,  min_cpt, short,      16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
02102 MIN_MAX_COMPXCHG_CPT( fixed4,  max_cpt, kmp_int32,  32, <, 0 )            // __kmpc_atomic_fixed4_max_cpt
02103 MIN_MAX_COMPXCHG_CPT( fixed4,  min_cpt, kmp_int32,  32, >, 0 )            // __kmpc_atomic_fixed4_min_cpt
02104 MIN_MAX_COMPXCHG_CPT( fixed8,  max_cpt, kmp_int64,  64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
02105 MIN_MAX_COMPXCHG_CPT( fixed8,  min_cpt, kmp_int64,  64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
02106 MIN_MAX_COMPXCHG_CPT( float4,  max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
02107 MIN_MAX_COMPXCHG_CPT( float4,  min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
02108 MIN_MAX_COMPXCHG_CPT( float8,  max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
02109 MIN_MAX_COMPXCHG_CPT( float8,  min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
02110 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY,    <, 16r,   1 )     // __kmpc_atomic_float16_max_cpt
02111 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY,    >, 16r,   1 )     // __kmpc_atomic_float16_min_cpt
02112 #if ( KMP_ARCH_X86 )
02113     MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r,  1 )  // __kmpc_atomic_float16_max_a16_cpt
02114     MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r,  1 )  // __kmpc_atomic_float16_mix_a16_cpt
02115 #endif
02116 
02117 // ------------------------------------------------------------------------
02118 #ifdef KMP_GOMP_COMPAT
02119 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)                                 \
02120     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
02121         KMP_CHECK_GTID;                                                   \
02122         OP_CRITICAL_CPT( OP, 0 );                                         \
02123     }
02124 #else
02125 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
02126 #endif /* KMP_GOMP_COMPAT */
02127 // ------------------------------------------------------------------------
02128 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
02129 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                 \
02130     TYPE new_value;                                                       \
02131     OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG)  /* send assignment */        \
02132     OP_CMPXCHG_CPT(TYPE,BITS,OP)                                          \
02133 }
02134 
02135 // ------------------------------------------------------------------------
02136 
02137 ATOMIC_CMPXCHG_CPT(  fixed1, neqv_cpt, kmp_int8,   8,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
02138 ATOMIC_CMPXCHG_CPT(  fixed2, neqv_cpt, kmp_int16, 16,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
02139 ATOMIC_CMPXCHG_CPT(  fixed4, neqv_cpt, kmp_int32, 32,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
02140 ATOMIC_CMPXCHG_CPT(  fixed8, neqv_cpt, kmp_int64, 64,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
02141 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt,  kmp_int8,   8,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
02142 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt,  kmp_int16, 16,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
02143 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt,  kmp_int32, 32,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
02144 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt,  kmp_int64, 64,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
02145 
02146 // ------------------------------------------------------------------------
02147 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
02148 //     TYPE_ID, OP_ID, TYPE - detailed above
02149 //     OP      - operator
02150 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
02151 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
02152 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                           \
02153     TYPE new_value;                                                 \
02154     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)  /* send assignment */       \
02155     OP_CRITICAL_CPT(OP##=,LCK_ID)          /* send assignment */    \
02156 }
02157 
02158 // ------------------------------------------------------------------------
02159 
02160 // Workaround for cmplx4. Regular routines with return value don't work
02161 // on Win_32e. Let's return captured values through the additional parameter.
02162 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID)                                    \
02163     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02164                                                                           \
02165     if( flag ) {                                                          \
02166         (*lhs) OP rhs;                                                    \
02167         (*out) = (*lhs);                                                  \
02168     } else {                                                              \
02169         (*out) = (*lhs);                                                  \
02170         (*lhs) OP rhs;                                                    \
02171     }                                                                     \
02172                                                                           \
02173     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02174     return;
02175 // ------------------------------------------------------------------------
02176 
02177 #ifdef KMP_GOMP_COMPAT
02178 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)                                 \
02179     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
02180         KMP_CHECK_GTID;                                                   \
02181         OP_CRITICAL_CPT_WRK( OP##=, 0 );                                  \
02182     }
02183 #else
02184 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
02185 #endif /* KMP_GOMP_COMPAT */
02186 // ------------------------------------------------------------------------
02187 
02188 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                              \
02189 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
02190 {                                                                         \
02191     KMP_DEBUG_ASSERT( __kmp_init_serial );                                \
02192     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
02193 // ------------------------------------------------------------------------
02194 
02195 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)   \
02196 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                                      \
02197     OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG)                                \
02198     OP_CRITICAL_CPT_WRK(OP##=,LCK_ID)                                     \
02199 }
02200 // The end of workaround for cmplx4
02201 
02202 /* ------------------------------------------------------------------------- */
02203 // routines for long double type
02204 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double,     +, 10r,   1 )            // __kmpc_atomic_float10_add_cpt
02205 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt
02206 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul_cpt
02207 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt
02208 // routines for _Quad type
02209 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add_cpt
02210 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt
02211 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY,     *, 16r,   1 )            // __kmpc_atomic_float16_mul_cpt
02212 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt
02213 #if ( KMP_ARCH_X86 )
02214     ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r,  1 )          // __kmpc_atomic_float16_add_a16_cpt
02215     ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt
02216     ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r,  1 )          // __kmpc_atomic_float16_mul_a16_cpt
02217     ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt
02218 #endif
02219 
02220 // routines for complex types
02221 
02222 // cmplx4 routines to return void
02223 ATOMIC_CRITICAL_CPT_WRK( cmplx4,  add_cpt, kmp_cmplx32, +, 8c,    1 )            // __kmpc_atomic_cmplx4_add_cpt
02224 ATOMIC_CRITICAL_CPT_WRK( cmplx4,  sub_cpt, kmp_cmplx32, -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_cpt
02225 ATOMIC_CRITICAL_CPT_WRK( cmplx4,  mul_cpt, kmp_cmplx32, *, 8c,    1 )            // __kmpc_atomic_cmplx4_mul_cpt
02226 ATOMIC_CRITICAL_CPT_WRK( cmplx4,  div_cpt, kmp_cmplx32, /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_cpt
02227 
02228 ATOMIC_CRITICAL_CPT( cmplx8,  add_cpt, kmp_cmplx64, +, 16c,   1 )            // __kmpc_atomic_cmplx8_add_cpt
02229 ATOMIC_CRITICAL_CPT( cmplx8,  sub_cpt, kmp_cmplx64, -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_cpt
02230 ATOMIC_CRITICAL_CPT( cmplx8,  mul_cpt, kmp_cmplx64, *, 16c,   1 )            // __kmpc_atomic_cmplx8_mul_cpt
02231 ATOMIC_CRITICAL_CPT( cmplx8,  div_cpt, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt
02232 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c,   1 )            // __kmpc_atomic_cmplx10_add_cpt
02233 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt
02234 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul_cpt
02235 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt
02236 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c,   1 )            // __kmpc_atomic_cmplx16_add_cpt
02237 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt
02238 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul_cpt
02239 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt
02240 #if ( KMP_ARCH_X86 )
02241     ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,   1 )   // __kmpc_atomic_cmplx16_add_a16_cpt
02242     ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt
02243     ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,   1 )   // __kmpc_atomic_cmplx16_mul_a16_cpt
02244     ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt
02245 #endif
02246 
02247 #if OMP_40_ENABLED
02248 
02249 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; }  for non-commutative operations.
02250 // Supported only on IA-32 architecture and Intel(R) 64
02251 
02252 // -------------------------------------------------------------------------
02253 // Operation on *lhs, rhs bound by critical section
02254 //     OP     - operator (it's supposed to contain an assignment)
02255 //     LCK_ID - lock identifier
02256 // Note: don't check gtid as it should always be valid
02257 // 1, 2-byte - expect valid parameter, other - check before this macro
02258 #define OP_CRITICAL_CPT_REV(OP,LCK_ID)                                    \
02259     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02260                                                                           \
02261     if( flag ) {                                                          \
02262         /*temp_val = (*lhs);*/\
02263         (*lhs) = (rhs) OP (*lhs);                                         \
02264         new_value = (*lhs);                                               \
02265     } else {                                                              \
02266         new_value = (*lhs);\
02267         (*lhs) = (rhs) OP (*lhs);                                         \
02268     }                                                                     \
02269     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02270     return new_value;
02271 
02272 // ------------------------------------------------------------------------
02273 #ifdef KMP_GOMP_COMPAT
02274 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)                                 \
02275     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
02276         KMP_CHECK_GTID;                                                   \
02277         OP_CRITICAL_CPT_REV( OP, 0 );                                     \
02278     }
02279 #else
02280 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
02281 #endif /* KMP_GOMP_COMPAT */
02282 
02283 // ------------------------------------------------------------------------
02284 // Operation on *lhs, rhs using "compare_and_store" routine
02285 //     TYPE    - operands' type
02286 //     BITS    - size in bits, used to distinguish low level calls
02287 //     OP      - operator
02288 // Note: temp_val introduced in order to force the compiler to read
02289 //       *lhs only once (w/o it the compiler reads *lhs twice)
02290 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                  \
02291     {                                                                     \
02292         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
02293         TYPE old_value, new_value;                                        \
02294         temp_val = *lhs;                                                  \
02295         old_value = temp_val;                                             \
02296         new_value = rhs OP old_value;                                     \
02297         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
02298                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
02299                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
02300         {                                                                 \
02301             KMP_CPU_PAUSE();                                              \
02302                                                                           \
02303             temp_val = *lhs;                                              \
02304             old_value = temp_val;                                         \
02305             new_value = rhs OP old_value;                                 \
02306         }                                                                 \
02307         if( flag ) {                                                      \
02308             return new_value;                                             \
02309         } else                                                            \
02310             return old_value;                                             \
02311     }
02312 
02313 // -------------------------------------------------------------------------
02314 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)       \
02315 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
02316     TYPE new_value;                                                        \
02317         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
02318     OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                                 \
02319     OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                       \
02320 }
02321 
02322 
02323 ATOMIC_CMPXCHG_CPT_REV( fixed1,  div_cpt_rev, kmp_int8,    8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_cpt_rev
02324 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8,   8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_cpt_rev
02325 ATOMIC_CMPXCHG_CPT_REV( fixed1,  shl_cpt_rev, kmp_int8,    8, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_cpt_rev
02326 ATOMIC_CMPXCHG_CPT_REV( fixed1,  shr_cpt_rev, kmp_int8,    8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_cpt_rev
02327 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8,   8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_cpt_rev
02328 ATOMIC_CMPXCHG_CPT_REV( fixed1,  sub_cpt_rev, kmp_int8,    8, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_cpt_rev
02329 ATOMIC_CMPXCHG_CPT_REV( fixed2,  div_cpt_rev, kmp_int16,  16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_cpt_rev
02330 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_cpt_rev
02331 ATOMIC_CMPXCHG_CPT_REV( fixed2,  shl_cpt_rev, kmp_int16,  16, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_cpt_rev
02332 ATOMIC_CMPXCHG_CPT_REV( fixed2,  shr_cpt_rev, kmp_int16,  16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_cpt_rev
02333 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_cpt_rev
02334 ATOMIC_CMPXCHG_CPT_REV( fixed2,  sub_cpt_rev, kmp_int16,  16, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_cpt_rev
02335 ATOMIC_CMPXCHG_CPT_REV( fixed4,  div_cpt_rev, kmp_int32,  32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_cpt_rev
02336 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_cpt_rev
02337 ATOMIC_CMPXCHG_CPT_REV( fixed4,  shl_cpt_rev, kmp_int32,  32, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_cpt_rev
02338 ATOMIC_CMPXCHG_CPT_REV( fixed4,  shr_cpt_rev, kmp_int32,  32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_cpt_rev
02339 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_cpt_rev
02340 ATOMIC_CMPXCHG_CPT_REV( fixed4,  sub_cpt_rev, kmp_int32,  32, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_sub_cpt_rev
02341 ATOMIC_CMPXCHG_CPT_REV( fixed8,  div_cpt_rev, kmp_int64,  64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_cpt_rev
02342 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_cpt_rev
02343 ATOMIC_CMPXCHG_CPT_REV( fixed8,  shl_cpt_rev, kmp_int64,  64, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_cpt_rev
02344 ATOMIC_CMPXCHG_CPT_REV( fixed8,  shr_cpt_rev, kmp_int64,  64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_cpt_rev
02345 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_cpt_rev
02346 ATOMIC_CMPXCHG_CPT_REV( fixed8,  sub_cpt_rev, kmp_int64,  64, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_cpt_rev
02347 ATOMIC_CMPXCHG_CPT_REV( float4,  div_cpt_rev, kmp_real32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_cpt_rev
02348 ATOMIC_CMPXCHG_CPT_REV( float4,  sub_cpt_rev, kmp_real32, 32, -,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt_rev
02349 ATOMIC_CMPXCHG_CPT_REV( float8,  div_cpt_rev, kmp_real64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_cpt_rev
02350 ATOMIC_CMPXCHG_CPT_REV( float8,  sub_cpt_rev, kmp_real64, 64, -,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt_rev
02351 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
02352 
02353 
02354 // ------------------------------------------------------------------------
02355 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
02356 //     TYPE_ID, OP_ID, TYPE - detailed above
02357 //     OP      - operator
02358 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
02359 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
02360 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                               \
02361     TYPE new_value;                                                     \
02362         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
02363     /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
02364     OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                              \
02365     OP_CRITICAL_CPT_REV(OP,LCK_ID)                                      \
02366 }
02367 
02368 
02369 /* ------------------------------------------------------------------------- */
02370 // routines for long double type
02371 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt_rev
02372 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt_rev
02373 // routines for _Quad type
02374 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt_rev
02375 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt_rev
02376 #if ( KMP_ARCH_X86 )
02377     ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt_rev
02378     ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt_rev
02379 #endif
02380 
02381 // routines for complex types
02382 
02383 // ------------------------------------------------------------------------
02384 
02385 // Workaround for cmplx4. Regular routines with return value don't work
02386 // on Win_32e. Let's return captured values through the additional parameter.
02387 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID)                                \
02388     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02389                                                                           \
02390     if( flag ) {                                                          \
02391         (*lhs) = (rhs) OP (*lhs);                                         \
02392         (*out) = (*lhs);                                                  \
02393     } else {                                                              \
02394         (*out) = (*lhs);                                                  \
02395         (*lhs) = (rhs) OP (*lhs);                                         \
02396     }                                                                     \
02397                                                                           \
02398     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02399     return;
02400 // ------------------------------------------------------------------------
02401 
02402 #ifdef KMP_GOMP_COMPAT
02403 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)                             \
02404     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
02405         KMP_CHECK_GTID;                                                   \
02406         OP_CRITICAL_CPT_REV_WRK( OP, 0 );                                 \
02407     }
02408 #else
02409 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
02410 #endif /* KMP_GOMP_COMPAT */
02411 // ------------------------------------------------------------------------
02412 
02413 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)   \
02414 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                                          \
02415     OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG)                                \
02416     OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID)                                        \
02417 }
02418 // The end of workaround for cmplx4
02419 
02420 
02421 // !!! TODO: check if we need to return void for cmplx4 routines
02422 // cmplx4 routines to return void
02423 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4,  sub_cpt_rev, kmp_cmplx32, -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_cpt_rev
02424 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4,  div_cpt_rev, kmp_cmplx32, /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_cpt_rev
02425 
02426 ATOMIC_CRITICAL_CPT_REV( cmplx8,  sub_cpt_rev, kmp_cmplx64, -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_cpt_rev
02427 ATOMIC_CRITICAL_CPT_REV( cmplx8,  div_cpt_rev, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt_rev
02428 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt_rev
02429 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt_rev
02430 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt_rev
02431 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt_rev
02432 #if ( KMP_ARCH_X86 )
02433     ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
02434     ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt_rev
02435 #endif
02436 
02437 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
02438 
02439 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                                    \
02440 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs )     \
02441 {                                                                                         \
02442     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
02443     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
02444 
02445 #define CRITICAL_SWP(LCK_ID)                                              \
02446     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02447                                                                           \
02448     old_value = (*lhs);                                                   \
02449     (*lhs) = rhs;                                                         \
02450                                                                           \
02451     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02452     return old_value;
02453 
02454 // ------------------------------------------------------------------------
02455 #ifdef KMP_GOMP_COMPAT
02456 #define GOMP_CRITICAL_SWP(FLAG)                                           \
02457     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
02458         KMP_CHECK_GTID;                                                   \
02459         CRITICAL_SWP( 0 );                                                \
02460     }
02461 #else
02462 #define GOMP_CRITICAL_SWP(FLAG)
02463 #endif /* KMP_GOMP_COMPAT */
02464 
02465 
02466 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                      \
02467 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
02468     TYPE old_value;                                                       \
02469     GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
02470     old_value = __kmp_xchg_fixed##BITS( lhs, rhs );                       \
02471     return old_value;                                                     \
02472 }
02473 // ------------------------------------------------------------------------
02474 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                \
02475 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
02476     TYPE old_value;                                                       \
02477     GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
02478     old_value = __kmp_xchg_real##BITS( lhs, rhs );                        \
02479     return old_value;                                                     \
02480 }
02481 
02482 // ------------------------------------------------------------------------
02483 #define CMPXCHG_SWP(TYPE,BITS)                                            \
02484     {                                                                     \
02485         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
02486         TYPE old_value, new_value;                                        \
02487         temp_val = *lhs;                                                  \
02488         old_value = temp_val;                                             \
02489         new_value = rhs;                                                  \
02490         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
02491                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
02492                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
02493         {                                                                 \
02494             KMP_CPU_PAUSE();                                              \
02495                                                                           \
02496             temp_val = *lhs;                                              \
02497             old_value = temp_val;                                         \
02498             new_value = rhs;                                              \
02499         }                                                                 \
02500         return old_value;                                                 \
02501     }
02502 
02503 // -------------------------------------------------------------------------
02504 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                   \
02505 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
02506     TYPE old_value;                                                       \
02507     GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
02508     CMPXCHG_SWP(TYPE,BITS)                                                \
02509 }
02510 
02511 ATOMIC_XCHG_SWP( fixed1, kmp_int8,    8, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_swp
02512 ATOMIC_XCHG_SWP( fixed2, kmp_int16,  16, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_swp
02513 ATOMIC_XCHG_SWP( fixed4, kmp_int32,  32, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_swp
02514 
02515 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 )      // __kmpc_atomic_float4_swp
02516 
02517 #if ( KMP_ARCH_X86 )
02518     ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_swp
02519     ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 )     // __kmpc_atomic_float8_swp
02520 #else
02521     ATOMIC_XCHG_SWP(       fixed8, kmp_int64, 64, KMP_ARCH_X86 )   // __kmpc_atomic_fixed8_swp
02522     ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 )  // __kmpc_atomic_float8_swp
02523 #endif
02524 
02525 // ------------------------------------------------------------------------
02526 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
02527 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG)              \
02528 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                          \
02529     TYPE old_value;                                                     \
02530     GOMP_CRITICAL_SWP(GOMP_FLAG)                                        \
02531     CRITICAL_SWP(LCK_ID)                                                \
02532 }
02533 
02534 // ------------------------------------------------------------------------
02535 
02536 // !!! TODO: check if we need to return void for cmplx4 routines
02537 // Workaround for cmplx4. Regular routines with return value don't work
02538 // on Win_32e. Let's return captured values through the additional parameter.
02539 
02540 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE)                                                \
02541 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out )     \
02542 {                                                                                         \
02543     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
02544     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
02545 
02546 
02547 #define CRITICAL_SWP_WRK(LCK_ID)                                          \
02548     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02549                                                                           \
02550     tmp = (*lhs);                                                         \
02551     (*lhs) = (rhs);                                                       \
02552     (*out) = tmp;                                                         \
02553     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
02554     return;
02555 
02556 // ------------------------------------------------------------------------
02557 
02558 #ifdef KMP_GOMP_COMPAT
02559 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                       \
02560     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
02561         KMP_CHECK_GTID;                                                   \
02562         CRITICAL_SWP_WRK( 0 );                                            \
02563     }
02564 #else
02565 #define GOMP_CRITICAL_SWP_WRK(FLAG)
02566 #endif /* KMP_GOMP_COMPAT */
02567 // ------------------------------------------------------------------------
02568 
02569 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG)           \
02570 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE)                                        \
02571     TYPE tmp;                                                             \
02572     GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                      \
02573     CRITICAL_SWP_WRK(LCK_ID)                                              \
02574 }
02575 // The end of workaround for cmplx4
02576 
02577 
02578 ATOMIC_CRITICAL_SWP( float10, long double, 10r,   1 )              // __kmpc_atomic_float10_swp
02579 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r,   1 )              // __kmpc_atomic_float16_swp
02580 // cmplx4 routine to return void
02581 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp
02582 
02583 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp
02584 
02585 
02586 ATOMIC_CRITICAL_SWP( cmplx8,  kmp_cmplx64, 16c,   1 )              // __kmpc_atomic_cmplx8_swp
02587 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c,   1 )              // __kmpc_atomic_cmplx10_swp
02588 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c,   1 )              // __kmpc_atomic_cmplx16_swp
02589 #if ( KMP_ARCH_X86 )
02590     ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t,         16r, 1 )  // __kmpc_atomic_float16_a16_swp
02591     ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 )  // __kmpc_atomic_cmplx16_a16_swp
02592 #endif
02593 
02594 
02595 // End of OpenMP 4.0 Capture
02596 
02597 #endif //OMP_40_ENABLED
02598 
02599 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
02600 
02601 
02602 #undef OP_CRITICAL
02603 
02604 /* ------------------------------------------------------------------------ */
02605 /* Generic atomic routines                                                  */
02606 /* ------------------------------------------------------------------------ */
02607 
02608 void
02609 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
02610 {
02611     KMP_DEBUG_ASSERT( __kmp_init_serial );
02612 
02613     if (
02614 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
02615         FALSE                                   /* must use lock */
02616 #else
02617         TRUE
02618 #endif
02619     )
02620     {
02621     kmp_int8 old_value, new_value;
02622 
02623     old_value = *(kmp_int8 *) lhs;
02624     (*f)( &new_value, &old_value, rhs );
02625 
02626     /* TODO: Should this be acquire or release? */
02627     while ( !  KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
02628                     *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
02629     {
02630         KMP_CPU_PAUSE();
02631 
02632         old_value = *(kmp_int8 *) lhs;
02633         (*f)( &new_value, &old_value, rhs );
02634     }
02635 
02636     return;
02637     }
02638     else {
02639         //
02640         // All 1-byte data is of integer data type.
02641         //
02642 
02643 #ifdef KMP_GOMP_COMPAT
02644         if ( __kmp_atomic_mode == 2 ) {
02645             __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
02646         }
02647         else
02648 #endif /* KMP_GOMP_COMPAT */
02649     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
02650 
02651     (*f)( lhs, lhs, rhs );
02652 
02653 #ifdef KMP_GOMP_COMPAT
02654         if ( __kmp_atomic_mode == 2 ) {
02655             __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
02656         }
02657         else
02658 #endif /* KMP_GOMP_COMPAT */
02659     __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
02660     }
02661 }
02662 
02663 void
02664 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
02665 {
02666     if (
02667 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
02668         FALSE                                   /* must use lock */
02669 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
02670     TRUE                    /* no alignment problems */
02671 #else
02672     ! ( (kmp_uintptr_t) lhs & 0x1)      /* make sure address is 2-byte aligned */
02673 #endif
02674     )
02675     {
02676     kmp_int16 old_value, new_value;
02677 
02678     old_value = *(kmp_int16 *) lhs;
02679     (*f)( &new_value, &old_value, rhs );
02680 
02681     /* TODO: Should this be acquire or release? */
02682     while ( !  KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
02683                     *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
02684     {
02685         KMP_CPU_PAUSE();
02686 
02687         old_value = *(kmp_int16 *) lhs;
02688         (*f)( &new_value, &old_value, rhs );
02689     }
02690 
02691     return;
02692     }
02693     else {
02694         //
02695         // All 2-byte data is of integer data type.
02696         //
02697 
02698 #ifdef KMP_GOMP_COMPAT
02699         if ( __kmp_atomic_mode == 2 ) {
02700             __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
02701         }
02702         else
02703 #endif /* KMP_GOMP_COMPAT */
02704     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
02705 
02706     (*f)( lhs, lhs, rhs );
02707 
02708 #ifdef KMP_GOMP_COMPAT
02709         if ( __kmp_atomic_mode == 2 ) {
02710             __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
02711         }
02712         else
02713 #endif /* KMP_GOMP_COMPAT */
02714     __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
02715     }
02716 }
02717 
02718 void
02719 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
02720 {
02721     KMP_DEBUG_ASSERT( __kmp_init_serial );
02722 
02723     if (
02724         //
02725         // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
02726         // Gomp compatibility is broken if this routine is called for floats.
02727         //
02728 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
02729     TRUE                    /* no alignment problems */
02730 #else
02731     ! ( (kmp_uintptr_t) lhs & 0x3)      /* make sure address is 4-byte aligned */
02732 #endif
02733     )
02734     {
02735     kmp_int32 old_value, new_value;
02736 
02737     old_value = *(kmp_int32 *) lhs;
02738     (*f)( &new_value, &old_value, rhs );
02739 
02740     /* TODO: Should this be acquire or release? */
02741     while ( !  KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
02742                     *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
02743     {
02744         KMP_CPU_PAUSE();
02745 
02746         old_value = *(kmp_int32 *) lhs;
02747         (*f)( &new_value, &old_value, rhs );
02748     }
02749 
02750     return;
02751     }
02752     else {
02753         //
02754         // Use __kmp_atomic_lock_4i for all 4-byte data,
02755         // even if it isn't of integer data type.
02756         //
02757 
02758 #ifdef KMP_GOMP_COMPAT
02759         if ( __kmp_atomic_mode == 2 ) {
02760             __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
02761         }
02762         else
02763 #endif /* KMP_GOMP_COMPAT */
02764     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
02765 
02766     (*f)( lhs, lhs, rhs );
02767 
02768 #ifdef KMP_GOMP_COMPAT
02769         if ( __kmp_atomic_mode == 2 ) {
02770             __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
02771         }
02772         else
02773 #endif /* KMP_GOMP_COMPAT */
02774     __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
02775     }
02776 }
02777 
02778 void
02779 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
02780 {
02781     KMP_DEBUG_ASSERT( __kmp_init_serial );
02782     if (
02783 
02784 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
02785         FALSE                                   /* must use lock */
02786 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
02787     TRUE                    /* no alignment problems */
02788 #else
02789     ! ( (kmp_uintptr_t) lhs & 0x7)      /* make sure address is 8-byte aligned */
02790 #endif
02791     )
02792     {
02793     kmp_int64 old_value, new_value;
02794 
02795     old_value = *(kmp_int64 *) lhs;
02796     (*f)( &new_value, &old_value, rhs );
02797     /* TODO: Should this be acquire or release? */
02798     while ( !  KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
02799                            *(kmp_int64 *) &old_value,
02800                            *(kmp_int64 *) &new_value ) )
02801     {
02802         KMP_CPU_PAUSE();
02803 
02804         old_value = *(kmp_int64 *) lhs;
02805         (*f)( &new_value, &old_value, rhs );
02806     }
02807 
02808     return;
02809     } else {
02810         //
02811         // Use __kmp_atomic_lock_8i for all 8-byte data,
02812         // even if it isn't of integer data type.
02813         //
02814 
02815 #ifdef KMP_GOMP_COMPAT
02816         if ( __kmp_atomic_mode == 2 ) {
02817             __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
02818         }
02819         else
02820 #endif /* KMP_GOMP_COMPAT */
02821     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
02822 
02823     (*f)( lhs, lhs, rhs );
02824 
02825 #ifdef KMP_GOMP_COMPAT
02826         if ( __kmp_atomic_mode == 2 ) {
02827             __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
02828         }
02829         else
02830 #endif /* KMP_GOMP_COMPAT */
02831     __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
02832     }
02833 }
02834 
02835 void
02836 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
02837 {
02838     KMP_DEBUG_ASSERT( __kmp_init_serial );
02839 
02840 #ifdef KMP_GOMP_COMPAT
02841     if ( __kmp_atomic_mode == 2 ) {
02842         __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
02843     }
02844     else
02845 #endif /* KMP_GOMP_COMPAT */
02846     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
02847 
02848     (*f)( lhs, lhs, rhs );
02849 
02850 #ifdef KMP_GOMP_COMPAT
02851     if ( __kmp_atomic_mode == 2 ) {
02852         __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
02853     }
02854     else
02855 #endif /* KMP_GOMP_COMPAT */
02856     __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
02857 }
02858 
02859 void
02860 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
02861 {
02862     KMP_DEBUG_ASSERT( __kmp_init_serial );
02863 
02864 #ifdef KMP_GOMP_COMPAT
02865     if ( __kmp_atomic_mode == 2 ) {
02866         __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
02867     }
02868     else
02869 #endif /* KMP_GOMP_COMPAT */
02870     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
02871 
02872     (*f)( lhs, lhs, rhs );
02873 
02874 #ifdef KMP_GOMP_COMPAT
02875     if ( __kmp_atomic_mode == 2 ) {
02876         __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
02877     }
02878     else
02879 #endif /* KMP_GOMP_COMPAT */
02880     __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
02881 }
02882 
02883 void
02884 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
02885 {
02886     KMP_DEBUG_ASSERT( __kmp_init_serial );
02887 
02888 #ifdef KMP_GOMP_COMPAT
02889     if ( __kmp_atomic_mode == 2 ) {
02890         __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
02891     }
02892     else
02893 #endif /* KMP_GOMP_COMPAT */
02894     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
02895 
02896     (*f)( lhs, lhs, rhs );
02897 
02898 #ifdef KMP_GOMP_COMPAT
02899     if ( __kmp_atomic_mode == 2 ) {
02900         __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
02901     }
02902     else
02903 #endif /* KMP_GOMP_COMPAT */
02904     __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
02905 }
02906 
02907 void
02908 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
02909 {
02910     KMP_DEBUG_ASSERT( __kmp_init_serial );
02911 
02912 #ifdef KMP_GOMP_COMPAT
02913     if ( __kmp_atomic_mode == 2 ) {
02914         __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
02915     }
02916     else
02917 #endif /* KMP_GOMP_COMPAT */
02918     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
02919 
02920     (*f)( lhs, lhs, rhs );
02921 
02922 #ifdef KMP_GOMP_COMPAT
02923     if ( __kmp_atomic_mode == 2 ) {
02924         __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
02925     }
02926     else
02927 #endif /* KMP_GOMP_COMPAT */
02928     __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
02929 }
02930 
02931 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
02932 //     duplicated in order to not use 3-party names in pure Intel code
02933 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
02934 void
02935 __kmpc_atomic_start(void)
02936 {
02937     int gtid = __kmp_entry_gtid();
02938     KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
02939     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
02940 }
02941 
02942 
02943 void
02944 __kmpc_atomic_end(void)
02945 {
02946     int gtid = __kmp_get_gtid();
02947     KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
02948     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
02949 }
02950 
02951 /* ------------------------------------------------------------------------ */
02952 /* ------------------------------------------------------------------------ */
02957 // end of file

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1