Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_atomic.c
1 /*
2  * kmp_atomic.c -- ATOMIC implementation routines
3  * $Revision: 42263 $
4  * $Date: 2013-04-04 11:03:19 -0500 (Thu, 04 Apr 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp_atomic.h"
38 #include "kmp.h" // TRUE, asm routines prototypes
39 
40 typedef unsigned char uchar;
41 typedef unsigned short ushort;
42 
560 /*
561  * Global vars
562  */
563 
564 #ifndef KMP_GOMP_COMPAT
565 int __kmp_atomic_mode = 1; // Intel perf
566 #else
567 int __kmp_atomic_mode = 2; // GOMP compatibility
568 #endif /* KMP_GOMP_COMPAT */
569 
570 KMP_ALIGN(128)
571 
572 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
573 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
574 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
575 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
576 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
577 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
578 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
579 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
580 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
581 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
582 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
583 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
584 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
585 
586 
587 /*
588  2007-03-02:
589  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
590  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
591  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
592  routines in assembler language.
593 */
594 #define KMP_ATOMIC_VOLATILE volatile
595 
596 #if ( KMP_ARCH_X86 )
597 
598  static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
599  static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
600  static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
601  static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
602  static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
603  static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
604 
605  static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
606  static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
607  static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
608  static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
609  static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
610  static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
611 
612  static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
613  static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
614  static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
615  static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
616 
617  static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
618  static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
619  static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
620  static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
621 
622 #endif
623 
624 /* ------------------------------------------------------------------------ */
625 /* ATOMIC implementation routines */
626 /* one routine for each operation and operand type */
627 /* ------------------------------------------------------------------------ */
628 
629 // All routines declarations looks like
630 // void __kmpc_atomic_RTYPE_OP( ident_t*, int*, TYPE *lhs, TYPE rhs );
631 // ------------------------------------------------------------------------
632 
633 #define KMP_CHECK_GTID \
634  if ( gtid == KMP_GTID_UNKNOWN ) { \
635  gtid = __kmp_entry_gtid(); \
636  } // check and get gtid when needed
637 
638 // Beginning of a definition (provides name, parameters, gebug trace)
639 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
640 // OP_ID - operation identifier (add, sub, mul, ...)
641 // TYPE - operands' type
642 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
643 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
644 { \
645  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
646  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
647 
648 // ------------------------------------------------------------------------
649 // Lock variables used for critical sections for various size operands
650 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
651 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
652 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
653 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
654 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
655 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
656 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
657 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
658 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
659 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
660 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
661 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
662 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
663 
664 // ------------------------------------------------------------------------
665 // Operation on *lhs, rhs bound by critical section
666 // OP - operator (it's supposed to contain an assignment)
667 // LCK_ID - lock identifier
668 // Note: don't check gtid as it should always be valid
669 // 1, 2-byte - expect valid parameter, other - check before this macro
670 #define OP_CRITICAL(OP,LCK_ID) \
671  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
672  \
673  (*lhs) OP (rhs); \
674  \
675  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
676 
677 // ------------------------------------------------------------------------
678 // For GNU compatibility, we may need to use a critical section,
679 // even though it is not required by the ISA.
680 //
681 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
682 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
683 // critical section. On Intel(R) 64, all atomic operations are done with fetch
684 // and add or compare and exchange. Therefore, the FLAG parameter to this
685 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
686 // require a critical section, where we predict that they will be implemented
687 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
688 //
689 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
690 // the FLAG parameter should always be 1. If we know that we will be using
691 // a critical section, then we want to make certain that we use the generic
692 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
693 // locks that are specialized based upon the size or type of the data.
694 //
695 // If FLAG is 0, then we are relying on dead code elimination by the build
696 // compiler to get rid of the useless block of code, and save a needless
697 // branch at runtime.
698 //
699 
700 #ifdef KMP_GOMP_COMPAT
701 #define OP_GOMP_CRITICAL(OP,FLAG) \
702  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
703  KMP_CHECK_GTID; \
704  OP_CRITICAL( OP, 0 ); \
705  return; \
706  }
707 #else
708 #define OP_GOMP_CRITICAL(OP,FLAG)
709 #endif /* KMP_GOMP_COMPAT */
710 
711 #if KMP_MIC
712 
713  #define KMP_DO_PAUSE _mm_delay_32( 30 )
714 
715  inline kmp_int32 __kmp_ex_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ) {
716  return __sync_bool_compare_and_swap( p, cv, sv );
717  }
718  inline kmp_int32 __kmp_ex_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ) {
719  return __sync_bool_compare_and_swap( p, cv, sv );
720  }
721 
722  #define KMP_EX_COMPARE_AND_STORE_ACQ32 __kmp_ex_compare_and_store32
723  #define KMP_EX_COMPARE_AND_STORE_ACQ64 __kmp_ex_compare_and_store64
724 
725 #else // KMP_MIC
726 
727  #define KMP_DO_PAUSE KMP_CPU_PAUSE()
728 
729  #define KMP_EX_COMPARE_AND_STORE_ACQ32 KMP_COMPARE_AND_STORE_ACQ32
730  #define KMP_EX_COMPARE_AND_STORE_ACQ64 KMP_COMPARE_AND_STORE_ACQ64
731 
732 #endif // KMP_MIC
733 
734 #define KMP_EX_COMPARE_AND_STORE_ACQ8 KMP_COMPARE_AND_STORE_ACQ8
735 #define KMP_EX_COMPARE_AND_STORE_ACQ16 KMP_COMPARE_AND_STORE_ACQ16
736 
737 // ------------------------------------------------------------------------
738 // Operation on *lhs, rhs using "compare_and_store" routine
739 // TYPE - operands' type
740 // BITS - size in bits, used to distinguish low level calls
741 // OP - operator
742 // Note: temp_val introduced in order to force the compiler to read
743 // *lhs only once (w/o it the compiler reads *lhs twice)
744 #define OP_CMPXCHG(TYPE,BITS,OP) \
745  { \
746  TYPE KMP_ATOMIC_VOLATILE temp_val; \
747  TYPE old_value, new_value; \
748  temp_val = *lhs; \
749  old_value = temp_val; \
750  new_value = old_value OP rhs; \
751  while ( ! KMP_EX_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
752  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
753  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
754  { \
755  KMP_DO_PAUSE; \
756  \
757  temp_val = *lhs; \
758  old_value = temp_val; \
759  new_value = old_value OP rhs; \
760  } \
761  }
762 
763 // 2007-06-25:
764 // workaround for C78287 (complex(kind=4) data type)
765 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
766 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
767 // This is a problem of the compiler.
768 // Related tracker is C76005, targeted to 11.0.
769 // I verified the asm of the workaround.
770 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
771  { \
772  char anonym[ ( sizeof( TYPE ) == sizeof( kmp_int##BITS ) ) ? ( 1 ) : ( 0 ) ] = { 1 }; \
773  struct _sss { \
774  TYPE cmp; \
775  kmp_int##BITS *vvv; \
776  }; \
777  struct _sss old_value, new_value; \
778  old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
779  new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
780  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
781  new_value.cmp = old_value.cmp OP rhs; \
782  while ( ! KMP_EX_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
783  *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
784  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
785  { \
786  KMP_DO_PAUSE; \
787  \
788  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
789  new_value.cmp = old_value.cmp OP rhs; \
790  } \
791  }
792 // end of the first part of the workaround for C78287
793 
794 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
795 
796 // ------------------------------------------------------------------------
797 // X86 or X86_64: no alignment problems ====================================
798 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
799 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
800  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
801  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
802  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
803 }
804 // -------------------------------------------------------------------------
805 #define ATOMIC_FLOAT_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
806 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
807  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
808  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
809  __kmp_test_then_add_real##BITS( lhs, OP rhs ); \
810 }
811 // -------------------------------------------------------------------------
812 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
813 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
814  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
815  OP_CMPXCHG(TYPE,BITS,OP) \
816 }
817 // -------------------------------------------------------------------------
818 // workaround for C78287 (complex(kind=4) data type)
819 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
820 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
821  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
822  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
823 }
824 // end of the second part of the workaround for C78287
825 
826 #else
827 // -------------------------------------------------------------------------
828 // Code for other architectures that don't handle unaligned accesses.
829 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
830 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
831  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
832  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
833  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
834  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
835  } else { \
836  KMP_CHECK_GTID; \
837  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
838  } \
839 }
840 // -------------------------------------------------------------------------
841 #define ATOMIC_FLOAT_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
842 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
843  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
844  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
845  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
846  } else { \
847  KMP_CHECK_GTID; \
848  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
849  } \
850 }
851 // -------------------------------------------------------------------------
852 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
853 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
854  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
855  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
856  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
857  } else { \
858  KMP_CHECK_GTID; \
859  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
860  } \
861 }
862 // -------------------------------------------------------------------------
863 // workaround for C78287 (complex(kind=4) data type)
864 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
865 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
866  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
867  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
868  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
869  } else { \
870  KMP_CHECK_GTID; \
871  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
872  } \
873 }
874 // end of the second part of the workaround for C78287
875 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
876 
877 // Routines for ATOMIC 4-byte operands addition and subtraction
878 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
879 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
880 
881 #if KMP_MIC
882 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
883 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
884 #else
885 ATOMIC_FLOAT_ADD( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
886 ATOMIC_FLOAT_ADD( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
887 #endif // KMP_MIC
888 
889 // Routines for ATOMIC 8-byte operands addition and subtraction
890 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
891 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
892 
893 #if KMP_MIC
894 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
895 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
896 #else
897 ATOMIC_FLOAT_ADD( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
898 ATOMIC_FLOAT_ADD( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
899 #endif // KMP_MIC
900 
901 // ------------------------------------------------------------------------
902 // Entries definition for integer operands
903 // TYPE_ID - operands type and size (fixed4, float4)
904 // OP_ID - operation identifier (add, sub, mul, ...)
905 // TYPE - operand type
906 // BITS - size in bits, used to distinguish low level calls
907 // OP - operator (used in critical section)
908 // LCK_ID - lock identifier, used to possibly distinguish lock variable
909 // MASK - used for alignment check
910 
911 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
912 // ------------------------------------------------------------------------
913 // Routines for ATOMIC integer operands, other operators
914 // ------------------------------------------------------------------------
915 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
916 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
917 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
918 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
919 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
920 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
921 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
922 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
923 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
924 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
925 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
926 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
927 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
928 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
929 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
930 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
931 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
932 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
933 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
934 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
935 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
936 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
937 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
938 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
939 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
940 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
941 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
942 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
943 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
944 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
945 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
946 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
947 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
948 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
949 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
950 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
951 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
952 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
953 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
954 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
955 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
956 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
957 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
958 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
959 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
960 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
961 
962 
963 /* ------------------------------------------------------------------------ */
964 /* Routines for C/C++ Reduction operators && and || */
965 /* ------------------------------------------------------------------------ */
966 
967 // ------------------------------------------------------------------------
968 // Need separate macros for &&, || because there is no combined assignment
969 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
970 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
971 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
972  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
973  OP_CRITICAL( = *lhs OP, LCK_ID ) \
974 }
975 
976 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
977 
978 // ------------------------------------------------------------------------
979 // X86 or X86_64: no alignment problems ===================================
980 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
981 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
982  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
983  OP_CMPXCHG(TYPE,BITS,OP) \
984 }
985 
986 #else
987 // ------------------------------------------------------------------------
988 // Code for other architectures that don't handle unaligned accesses.
989 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
990 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
991  OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
992  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
993  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
994  } else { \
995  KMP_CHECK_GTID; \
996  OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
997  } \
998 }
999 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1000 
1001 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
1002 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
1003 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
1004 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
1005 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
1006 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
1007 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
1008 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
1009 
1010 
1011 /* ------------------------------------------------------------------------- */
1012 /* Routines for Fortran operators that matched no one in C: */
1013 /* MAX, MIN, .EQV., .NEQV. */
1014 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1015 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1016 /* ------------------------------------------------------------------------- */
1017 
1018 // -------------------------------------------------------------------------
1019 // MIN and MAX need separate macros
1020 // OP - operator to check if we need any actions?
1021 #define MIN_MAX_CRITSECT(OP,LCK_ID) \
1022  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1023  \
1024  if ( *lhs OP rhs ) { /* still need actions? */ \
1025  *lhs = rhs; \
1026  } \
1027  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1028 
1029 // -------------------------------------------------------------------------
1030 #ifdef KMP_GOMP_COMPAT
1031 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
1032  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
1033  KMP_CHECK_GTID; \
1034  MIN_MAX_CRITSECT( OP, 0 ); \
1035  return; \
1036  }
1037 #else
1038 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
1039 #endif /* KMP_GOMP_COMPAT */
1040 
1041 // -------------------------------------------------------------------------
1042 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1043  { \
1044  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1045  TYPE old_value; \
1046  temp_val = *lhs; \
1047  old_value = temp_val; \
1048  while ( old_value OP rhs && /* still need actions? */ \
1049  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1050  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1051  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
1052  { \
1053  KMP_CPU_PAUSE(); \
1054  temp_val = *lhs; \
1055  old_value = temp_val; \
1056  } \
1057  }
1058 
1059 // -------------------------------------------------------------------------
1060 // 1-byte, 2-byte operands - use critical section
1061 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1062 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1063  if ( *lhs OP rhs ) { /* need actions? */ \
1064  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1065  MIN_MAX_CRITSECT(OP,LCK_ID) \
1066  } \
1067 }
1068 
1069 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1070 
1071 // -------------------------------------------------------------------------
1072 // X86 or X86_64: no alignment problems ====================================
1073 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1074 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1075  if ( *lhs OP rhs ) { \
1076  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1077  MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1078  } \
1079 }
1080 
1081 #else
1082 // -------------------------------------------------------------------------
1083 // Code for other architectures that don't handle unaligned accesses.
1084 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1085 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1086  if ( *lhs OP rhs ) { \
1087  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1088  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1089  MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1090  } else { \
1091  KMP_CHECK_GTID; \
1092  MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1093  } \
1094  } \
1095 }
1096 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1097 
1098 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1099 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1100 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1101 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1102 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1103 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1104 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1105 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1106 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1107 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1108 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1109 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1110 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1111 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1112 #if ( KMP_ARCH_X86 )
1113  MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1114  MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1115 #endif
1116 // ------------------------------------------------------------------------
1117 // Need separate macros for .EQV. because of the need of complement (~)
1118 // OP ignored for critical sections, ^=~ used instead
1119 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1120 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1121  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1122  OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1123 }
1124 
1125 // ------------------------------------------------------------------------
1126 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1127 // ------------------------------------------------------------------------
1128 // X86 or X86_64: no alignment problems ===================================
1129 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1130 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1131  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1132  OP_CMPXCHG(TYPE,BITS,OP) \
1133 }
1134 // ------------------------------------------------------------------------
1135 #else
1136 // ------------------------------------------------------------------------
1137 // Code for other architectures that don't handle unaligned accesses.
1138 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1139 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1140  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1141  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1142  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1143  } else { \
1144  KMP_CHECK_GTID; \
1145  OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1146  } \
1147 }
1148 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1149 
1150 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1151 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1152 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1153 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1154 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1155 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1156 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1157 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1158 
1159 
1160 // ------------------------------------------------------------------------
1161 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1162 // TYPE_ID, OP_ID, TYPE - detailed above
1163 // OP - operator
1164 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1165 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1166 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1167  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1168  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1169 }
1170 
1171 /* ------------------------------------------------------------------------- */
1172 // routines for long double type
1173 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1174 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1175 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1176 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
1177 // routines for _Quad type
1178 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1179 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1180 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1181 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1182 #if ( KMP_ARCH_X86 )
1183  ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1184  ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1185  ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1186  ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1187 #endif
1188 // routines for complex types
1189 
1190 // workaround for C78287 (complex(kind=4) data type)
1191 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1192 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1193 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1194 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1195 // end of the workaround for C78287
1196 
1197 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1198 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1199 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1200 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1201 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1202 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1203 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1204 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
1205 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1206 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1207 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1208 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1209 #if ( KMP_ARCH_X86 )
1210  ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1211  ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1212  ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1213  ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1214 #endif
1215 
1216 #if OMP_40_ENABLED
1217 
1218 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1219 // Supported only on IA-32 architecture and Intel(R) 64
1220 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1221 
1222 // ------------------------------------------------------------------------
1223 // Operation on *lhs, rhs bound by critical section
1224 // OP - operator (it's supposed to contain an assignment)
1225 // LCK_ID - lock identifier
1226 // Note: don't check gtid as it should always be valid
1227 // 1, 2-byte - expect valid parameter, other - check before this macro
1228 #define OP_CRITICAL_REV(OP,LCK_ID) \
1229  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1230  \
1231  (*lhs) = (rhs) OP (*lhs); \
1232  \
1233  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1234 
1235 #ifdef KMP_GOMP_COMPAT
1236 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1237  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1238  KMP_CHECK_GTID; \
1239  OP_CRITICAL_REV( OP, 0 ); \
1240  return; \
1241  }
1242 #else
1243 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1244 #endif /* KMP_GOMP_COMPAT */
1245 
1246 
1247 // Beginning of a definition (provides name, parameters, gebug trace)
1248 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1249 // OP_ID - operation identifier (add, sub, mul, ...)
1250 // TYPE - operands' type
1251 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1252 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1253 { \
1254  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1255  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1256 
1257 // ------------------------------------------------------------------------
1258 // Operation on *lhs, rhs using "compare_and_store" routine
1259 // TYPE - operands' type
1260 // BITS - size in bits, used to distinguish low level calls
1261 // OP - operator
1262 // Note: temp_val introduced in order to force the compiler to read
1263 // *lhs only once (w/o it the compiler reads *lhs twice)
1264 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1265  { \
1266  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1267  TYPE old_value, new_value; \
1268  temp_val = *lhs; \
1269  old_value = temp_val; \
1270  new_value = rhs OP old_value; \
1271  while ( ! KMP_EX_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1272  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1273  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1274  { \
1275  KMP_DO_PAUSE; \
1276  \
1277  temp_val = *lhs; \
1278  old_value = temp_val; \
1279  new_value = rhs OP old_value; \
1280  } \
1281  }
1282 
1283 // -------------------------------------------------------------------------
1284 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1285 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1286  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1287  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1288 }
1289 
1290 // ------------------------------------------------------------------------
1291 // Entries definition for integer operands
1292 // TYPE_ID - operands type and size (fixed4, float4)
1293 // OP_ID - operation identifier (add, sub, mul, ...)
1294 // TYPE - operand type
1295 // BITS - size in bits, used to distinguish low level calls
1296 // OP - operator (used in critical section)
1297 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1298 
1299 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1300 // ------------------------------------------------------------------------
1301 // Routines for ATOMIC integer operands, other operators
1302 // ------------------------------------------------------------------------
1303 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1304 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1305 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1306 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1307 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1308 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1309 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1310 
1311 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1312 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1313 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1314 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1315 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1316 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1317 
1318 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1319 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1320 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1321 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1322 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1323 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1324 
1325 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1326 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1327 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1328 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1329 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1330 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1331 
1332 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1333 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1334 
1335 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1336 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1337 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1338 
1339 // ------------------------------------------------------------------------
1340 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1341 // TYPE_ID, OP_ID, TYPE - detailed above
1342 // OP - operator
1343 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1344 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1345 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1346  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1347  OP_CRITICAL_REV(OP,LCK_ID) \
1348 }
1349 
1350 /* ------------------------------------------------------------------------- */
1351 // routines for long double type
1352 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1353 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
1354 // routines for _Quad type
1355 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1356 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1357 #if ( KMP_ARCH_X86 )
1358  ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1359  ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1360 #endif
1361 
1362 // routines for complex types
1363 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1364 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1365 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1366 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1367 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1368 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
1369 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1370 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1371 #if ( KMP_ARCH_X86 )
1372  ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1373  ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1374 #endif
1375 
1376 
1377 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1378 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1379 
1380 #endif //OMP_40_ENABLED
1381 
1382 
1383 /* ------------------------------------------------------------------------ */
1384 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1385 /* Note: in order to reduce the total number of types combinations */
1386 /* it is supposed that compiler converts RHS to longest floating type,*/
1387 /* that is _Quad, before call to any of these routines */
1388 /* Conversion to _Quad will be done by the compiler during calculation, */
1389 /* conversion back to TYPE - before the assignment, like: */
1390 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1391 /* Performance penalty expected because of SW emulation use */
1392 /* ------------------------------------------------------------------------ */
1393 
1394 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1395 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1396 { \
1397  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1398  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1399 
1400 // -------------------------------------------------------------------------
1401 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1402 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1403  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1404  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1405 }
1406 
1407 // -------------------------------------------------------------------------
1408 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1409 // -------------------------------------------------------------------------
1410 // X86 or X86_64: no alignment problems ====================================
1411 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1412 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1413  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1414  OP_CMPXCHG(TYPE,BITS,OP) \
1415 }
1416 // -------------------------------------------------------------------------
1417 #else
1418 // ------------------------------------------------------------------------
1419 // Code for other architectures that don't handle unaligned accesses.
1420 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1421 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1422  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1423  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1424  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1425  } else { \
1426  KMP_CHECK_GTID; \
1427  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1428  } \
1429 }
1430 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1431 
1432 // RHS=float8
1433 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1434 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1435 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1436 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1437 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1438 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1439 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1440 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1441 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1442 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1443 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1444 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1445 
1446 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1447 
1448 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1449 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1450 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1451 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1452 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1453 
1454 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1455 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1456 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1457 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1458 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1459 
1460 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1461 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1462 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1463 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1464 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1465 
1466 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1467 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1468 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1469 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1470 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1471 
1472 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1473 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1474 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1475 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1476 
1477 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1478 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1479 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1480 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1481 
1482 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1483 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1484 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1485 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
1486 
1487 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1488 // ------------------------------------------------------------------------
1489 // X86 or X86_64: no alignment problems ====================================
1490 // workaround for C78287 (complex(kind=4) data type)
1491 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1492 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1493  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1494  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1495 }
1496 // end of the second part of the workaround for C78287
1497 #else
1498 // ------------------------------------------------------------------------
1499 // Code for other architectures that don't handle unaligned accesses.
1500 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1501 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1502  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1503  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1504  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1505  } else { \
1506  KMP_CHECK_GTID; \
1507  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1508  } \
1509 }
1510 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1511 
1512 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1513 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1514 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1515 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1516 
1517 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1518 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1519 
1521 // ------------------------------------------------------------------------
1522 // Atomic READ routines
1523 // ------------------------------------------------------------------------
1524 
1525 // ------------------------------------------------------------------------
1526 // Beginning of a definition (provides name, parameters, gebug trace)
1527 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1528 // OP_ID - operation identifier (add, sub, mul, ...)
1529 // TYPE - operands' type
1530 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1531 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1532 { \
1533  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1534  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1535 
1536 // ------------------------------------------------------------------------
1537 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1538 // TYPE - operands' type
1539 // BITS - size in bits, used to distinguish low level calls
1540 // OP - operator
1541 // Note: temp_val introduced in order to force the compiler to read
1542 // *lhs only once (w/o it the compiler reads *lhs twice)
1543 // TODO: check if it is still necessary
1544 // Return old value regardless of the result of "compare & swap# operation
1545 
1546 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1547  { \
1548  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1549  union f_i_union { \
1550  TYPE f_val; \
1551  kmp_int##BITS i_val; \
1552  }; \
1553  union f_i_union old_value; \
1554  temp_val = *loc; \
1555  old_value.f_val = temp_val; \
1556  old_value.i_val = __kmp_compare_and_store_ret##BITS( (kmp_int##BITS *) loc, \
1557  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1558  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1559  new_value = old_value.f_val; \
1560  return new_value; \
1561  }
1562 
1563 // -------------------------------------------------------------------------
1564 // Operation on *lhs, rhs bound by critical section
1565 // OP - operator (it's supposed to contain an assignment)
1566 // LCK_ID - lock identifier
1567 // Note: don't check gtid as it should always be valid
1568 // 1, 2-byte - expect valid parameter, other - check before this macro
1569 #define OP_CRITICAL_READ(OP,LCK_ID) \
1570  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1571  \
1572  new_value = (*loc); \
1573  \
1574  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1575 
1576 // -------------------------------------------------------------------------
1577 #ifdef KMP_GOMP_COMPAT
1578 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1579  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1580  KMP_CHECK_GTID; \
1581  OP_CRITICAL_READ( OP, 0 ); \
1582  return new_value; \
1583  }
1584 #else
1585 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1586 #endif /* KMP_GOMP_COMPAT */
1587 
1588 // -------------------------------------------------------------------------
1589 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1590 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1591  TYPE new_value; \
1592  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1593  new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1594  return new_value; \
1595 }
1596 // -------------------------------------------------------------------------
1597 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1598 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1599  TYPE new_value; \
1600  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1601  OP_CMPXCHG_READ(TYPE,BITS,OP) \
1602 }
1603 // ------------------------------------------------------------------------
1604 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1605 // TYPE_ID, OP_ID, TYPE - detailed above
1606 // OP - operator
1607 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1608 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1609 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1610  TYPE new_value; \
1611  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1612  OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1613  return new_value; \
1614 }
1615 
1616 // ------------------------------------------------------------------------
1617 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1618 // Let's return the read value through the additional parameter.
1619 
1620 #if ( KMP_OS_WINDOWS )
1621 
1622 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1623  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1624  \
1625  (*out) = (*loc); \
1626  \
1627  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1628 // ------------------------------------------------------------------------
1629 #ifdef KMP_GOMP_COMPAT
1630 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1631  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1632  KMP_CHECK_GTID; \
1633  OP_CRITICAL_READ_WRK( OP, 0 ); \
1634  }
1635 #else
1636 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1637 #endif /* KMP_GOMP_COMPAT */
1638 // ------------------------------------------------------------------------
1639 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1640 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1641 { \
1642  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1643  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1644 
1645 // ------------------------------------------------------------------------
1646 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1647 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1648  OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1649  OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1650 }
1651 
1652 #endif // KMP_OS_WINDOWS
1653 
1654 // ------------------------------------------------------------------------
1655 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1656 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1657 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1658 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1659 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1660 
1661 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1662 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1663 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1664 
1665 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
1666 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
1667 
1668 // Fix for CQ220361 on Windows* OS
1669 #if ( KMP_OS_WINDOWS )
1670  ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1671 #else
1672  ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1673 #endif
1674 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1675 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
1676 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1677 #if ( KMP_ARCH_X86 )
1678  ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1679  ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1680 #endif
1681 
1682 
1683 // ------------------------------------------------------------------------
1684 // Atomic WRITE routines
1685 // ------------------------------------------------------------------------
1686 
1687 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1688 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1689  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1690  __kmp_xchg_fixed##BITS( lhs, rhs ); \
1691 }
1692 // ------------------------------------------------------------------------
1693 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1694 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1695  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1696  __kmp_xchg_real##BITS( lhs, rhs ); \
1697 }
1698 
1699 
1700 // ------------------------------------------------------------------------
1701 // Operation on *lhs, rhs using "compare_and_store" routine
1702 // TYPE - operands' type
1703 // BITS - size in bits, used to distinguish low level calls
1704 // OP - operator
1705 // Note: temp_val introduced in order to force the compiler to read
1706 // *lhs only once (w/o it the compiler reads *lhs twice)
1707 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1708  { \
1709  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1710  TYPE old_value, new_value; \
1711  temp_val = *lhs; \
1712  old_value = temp_val; \
1713  new_value = rhs; \
1714  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1715  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1716  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1717  { \
1718  KMP_CPU_PAUSE(); \
1719  \
1720  temp_val = *lhs; \
1721  old_value = temp_val; \
1722  new_value = rhs; \
1723  } \
1724  }
1725 
1726 // -------------------------------------------------------------------------
1727 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1728 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1729  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1730  OP_CMPXCHG_WR(TYPE,BITS,OP) \
1731 }
1732 
1733 // ------------------------------------------------------------------------
1734 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1735 // TYPE_ID, OP_ID, TYPE - detailed above
1736 // OP - operator
1737 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1738 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1739 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1740  OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1741  OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1742 }
1743 // -------------------------------------------------------------------------
1744 
1745 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1746 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1747 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1748 #if ( KMP_ARCH_X86 )
1749  ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1750 #else
1751  ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1752 #endif
1753 
1754 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1755 #if ( KMP_ARCH_X86 )
1756  ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1757 #else
1758  ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1759 #endif
1760 
1761 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
1762 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
1763 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1764 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1765 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
1766 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1767 #if ( KMP_ARCH_X86 )
1768  ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1769  ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1770 #endif
1771 
1772 
1773 // ------------------------------------------------------------------------
1774 // Atomic CAPTURE routines
1775 // ------------------------------------------------------------------------
1776 
1777 // Beginning of a definition (provides name, parameters, gebug trace)
1778 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1779 // OP_ID - operation identifier (add, sub, mul, ...)
1780 // TYPE - operands' type
1781 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1782 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1783 { \
1784  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1785  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1786 
1787 // -------------------------------------------------------------------------
1788 // Operation on *lhs, rhs bound by critical section
1789 // OP - operator (it's supposed to contain an assignment)
1790 // LCK_ID - lock identifier
1791 // Note: don't check gtid as it should always be valid
1792 // 1, 2-byte - expect valid parameter, other - check before this macro
1793 #define OP_CRITICAL_CPT(OP,LCK_ID) \
1794  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1795  \
1796  if( flag ) { \
1797  (*lhs) OP rhs; \
1798  new_value = (*lhs); \
1799  } else { \
1800  new_value = (*lhs); \
1801  (*lhs) OP rhs; \
1802  } \
1803  \
1804  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1805  return new_value;
1806 
1807 // ------------------------------------------------------------------------
1808 #ifdef KMP_GOMP_COMPAT
1809 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1810  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1811  KMP_CHECK_GTID; \
1812  OP_CRITICAL_CPT( OP##=, 0 ); \
1813  }
1814 #else
1815 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1816 #endif /* KMP_GOMP_COMPAT */
1817 
1818 // ------------------------------------------------------------------------
1819 // Operation on *lhs, rhs using "compare_and_store" routine
1820 // TYPE - operands' type
1821 // BITS - size in bits, used to distinguish low level calls
1822 // OP - operator
1823 // Note: temp_val introduced in order to force the compiler to read
1824 // *lhs only once (w/o it the compiler reads *lhs twice)
1825 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1826  { \
1827  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1828  TYPE old_value, new_value; \
1829  temp_val = *lhs; \
1830  old_value = temp_val; \
1831  new_value = old_value OP rhs; \
1832  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1833  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1834  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1835  { \
1836  KMP_CPU_PAUSE(); \
1837  \
1838  temp_val = *lhs; \
1839  old_value = temp_val; \
1840  new_value = old_value OP rhs; \
1841  } \
1842  if( flag ) { \
1843  return new_value; \
1844  } else \
1845  return old_value; \
1846  }
1847 
1848 // -------------------------------------------------------------------------
1849 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1850 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1851  TYPE new_value; \
1852  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1853  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1854 }
1855 
1856 // -------------------------------------------------------------------------
1857 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1858 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1859  TYPE old_value, new_value; \
1860  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1861  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1862  old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1863  if( flag ) { \
1864  return old_value OP rhs; \
1865  } else \
1866  return old_value; \
1867 }
1868 // -------------------------------------------------------------------------
1869 #define ATOMIC_FLOAT_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1870 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1871  TYPE old_value, new_value; \
1872  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1873  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1874  old_value = __kmp_test_then_add_real##BITS( lhs, OP rhs ); \
1875  if( flag ) { \
1876  return old_value OP rhs; \
1877  } else \
1878  return old_value; \
1879 }
1880 // -------------------------------------------------------------------------
1881 
1882 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1883 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1884 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1885 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1886 
1887 #if KMP_MIC
1888 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1889 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1890 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1891 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1892 #else
1893 ATOMIC_FLOAT_ADD_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1894 ATOMIC_FLOAT_ADD_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1895 ATOMIC_FLOAT_ADD_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1896 ATOMIC_FLOAT_ADD_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1897 #endif // KMP_MIC
1898 
1899 // ------------------------------------------------------------------------
1900 // Entries definition for integer operands
1901 // TYPE_ID - operands type and size (fixed4, float4)
1902 // OP_ID - operation identifier (add, sub, mul, ...)
1903 // TYPE - operand type
1904 // BITS - size in bits, used to distinguish low level calls
1905 // OP - operator (used in critical section)
1906 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1907 // ------------------------------------------------------------------------
1908 // Routines for ATOMIC integer operands, other operators
1909 // ------------------------------------------------------------------------
1910 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1911 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1912 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1913 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1914 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1915 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1916 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1917 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1918 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1919 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1920 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1921 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1922 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1923 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1924 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1925 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1926 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1927 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1928 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1929 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1930 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1931 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1932 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1933 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1934 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1935 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1936 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1937 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1938 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1939 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1940 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1941 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1942 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1943 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1944 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1945 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1946 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1947 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1948 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1949 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1950 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1951 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1952 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1953 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1954 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1955 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1956 
1957 // ------------------------------------------------------------------------
1958 // Routines for C/C++ Reduction operators && and ||
1959 // ------------------------------------------------------------------------
1960 
1961 // -------------------------------------------------------------------------
1962 // Operation on *lhs, rhs bound by critical section
1963 // OP - operator (it's supposed to contain an assignment)
1964 // LCK_ID - lock identifier
1965 // Note: don't check gtid as it should always be valid
1966 // 1, 2-byte - expect valid parameter, other - check before this macro
1967 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \
1968  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1969  \
1970  if( flag ) { \
1971  new_value OP rhs; \
1972  } else \
1973  new_value = (*lhs); \
1974  \
1975  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1976 
1977 // ------------------------------------------------------------------------
1978 #ifdef KMP_GOMP_COMPAT
1979 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
1980  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1981  KMP_CHECK_GTID; \
1982  OP_CRITICAL_L_CPT( OP, 0 ); \
1983  return new_value; \
1984  }
1985 #else
1986 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
1987 #endif /* KMP_GOMP_COMPAT */
1988 
1989 // ------------------------------------------------------------------------
1990 // Need separate macros for &&, || because there is no combined assignment
1991 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1992 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1993  TYPE new_value; \
1994  OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
1995  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1996 }
1997 
1998 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
1999 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
2000 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
2001 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
2002 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
2003 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
2004 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
2005 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
2006 
2007 
2008 // -------------------------------------------------------------------------
2009 // Routines for Fortran operators that matched no one in C:
2010 // MAX, MIN, .EQV., .NEQV.
2011 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2012 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2013 // -------------------------------------------------------------------------
2014 
2015 // -------------------------------------------------------------------------
2016 // MIN and MAX need separate macros
2017 // OP - operator to check if we need any actions?
2018 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2019  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2020  \
2021  if ( *lhs OP rhs ) { /* still need actions? */ \
2022  old_value = *lhs; \
2023  *lhs = rhs; \
2024  if ( flag ) \
2025  new_value = rhs; \
2026  else \
2027  new_value = old_value; \
2028  } \
2029  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2030  return new_value; \
2031 
2032 // -------------------------------------------------------------------------
2033 #ifdef KMP_GOMP_COMPAT
2034 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
2035  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
2036  KMP_CHECK_GTID; \
2037  MIN_MAX_CRITSECT_CPT( OP, 0 ); \
2038  }
2039 #else
2040 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
2041 #endif /* KMP_GOMP_COMPAT */
2042 
2043 // -------------------------------------------------------------------------
2044 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2045  { \
2046  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2047  /*TYPE old_value; */ \
2048  temp_val = *lhs; \
2049  old_value = temp_val; \
2050  while ( old_value OP rhs && /* still need actions? */ \
2051  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2052  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2053  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2054  { \
2055  KMP_CPU_PAUSE(); \
2056  temp_val = *lhs; \
2057  old_value = temp_val; \
2058  } \
2059  if( flag ) \
2060  return rhs; \
2061  else \
2062  return old_value; \
2063  }
2064 
2065 // -------------------------------------------------------------------------
2066 // 1-byte, 2-byte operands - use critical section
2067 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2068 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2069  TYPE new_value, old_value; \
2070  if ( *lhs OP rhs ) { /* need actions? */ \
2071  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2072  MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2073  } \
2074  return *lhs; \
2075 }
2076 
2077 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2078 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2079  TYPE new_value, old_value; \
2080  if ( *lhs OP rhs ) { \
2081  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2082  MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2083  } \
2084  return *lhs; \
2085 }
2086 
2087 
2088 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2089 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2090 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2091 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2092 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2093 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2094 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2095 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2096 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2097 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2098 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2099 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2100 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2101 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2102 #if ( KMP_ARCH_X86 )
2103  MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2104  MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2105 #endif
2106 
2107 // ------------------------------------------------------------------------
2108 #ifdef KMP_GOMP_COMPAT
2109 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2110  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2111  KMP_CHECK_GTID; \
2112  OP_CRITICAL_CPT( OP, 0 ); \
2113  }
2114 #else
2115 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2116 #endif /* KMP_GOMP_COMPAT */
2117 // ------------------------------------------------------------------------
2118 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2119 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2120  TYPE new_value; \
2121  OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2122  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2123 }
2124 
2125 // ------------------------------------------------------------------------
2126 
2127 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2128 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2129 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2130 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2131 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2132 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2133 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2134 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2135 
2136 // ------------------------------------------------------------------------
2137 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2138 // TYPE_ID, OP_ID, TYPE - detailed above
2139 // OP - operator
2140 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2141 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2142 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2143  TYPE new_value; \
2144  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2145  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2146 }
2147 
2148 // ------------------------------------------------------------------------
2149 
2150 // Workaround for cmplx4. Regular routines with return value don't work
2151 // on Win_32e. Let's return captured values through the additional parameter.
2152 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2153  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2154  \
2155  if( flag ) { \
2156  (*lhs) OP rhs; \
2157  (*out) = (*lhs); \
2158  } else { \
2159  (*out) = (*lhs); \
2160  (*lhs) OP rhs; \
2161  } \
2162  \
2163  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2164  return;
2165 // ------------------------------------------------------------------------
2166 
2167 #ifdef KMP_GOMP_COMPAT
2168 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2169  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2170  KMP_CHECK_GTID; \
2171  OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2172  }
2173 #else
2174 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2175 #endif /* KMP_GOMP_COMPAT */
2176 // ------------------------------------------------------------------------
2177 
2178 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2179 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2180 { \
2181  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2182  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2183 // ------------------------------------------------------------------------
2184 
2185 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2186 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2187  OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2188  OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2189 }
2190 // The end of workaround for cmplx4
2191 
2192 /* ------------------------------------------------------------------------- */
2193 // routines for long double type
2194 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2195 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2196 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2197 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
2198 // routines for _Quad type
2199 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2200 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2201 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2202 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2203 #if ( KMP_ARCH_X86 )
2204  ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2205  ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2206  ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2207  ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2208 #endif
2209 
2210 // routines for complex types
2211 
2212 // cmplx4 routines to return void
2213 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2214 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2215 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2216 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2217 
2218 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2219 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2220 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2221 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2222 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2223 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2224 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2225 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
2226 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2227 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2228 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2229 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2230 #if ( KMP_ARCH_X86 )
2231  ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2232  ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2233  ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2234  ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2235 #endif
2236 
2237 #if OMP_40_ENABLED
2238 
2239 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2240 // Supported only on IA-32 architecture and Intel(R) 64
2241 
2242 // -------------------------------------------------------------------------
2243 // Operation on *lhs, rhs bound by critical section
2244 // OP - operator (it's supposed to contain an assignment)
2245 // LCK_ID - lock identifier
2246 // Note: don't check gtid as it should always be valid
2247 // 1, 2-byte - expect valid parameter, other - check before this macro
2248 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2249  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2250  \
2251  if( flag ) { \
2252  /*temp_val = (*lhs);*/\
2253  (*lhs) = (rhs) OP (*lhs); \
2254  new_value = (*lhs); \
2255  } else { \
2256  new_value = (*lhs);\
2257  (*lhs) = (rhs) OP (*lhs); \
2258  } \
2259  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2260  return new_value;
2261 
2262 // ------------------------------------------------------------------------
2263 #ifdef KMP_GOMP_COMPAT
2264 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2265  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2266  KMP_CHECK_GTID; \
2267  OP_CRITICAL_CPT_REV( OP, 0 ); \
2268  }
2269 #else
2270 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2271 #endif /* KMP_GOMP_COMPAT */
2272 
2273 // ------------------------------------------------------------------------
2274 // Operation on *lhs, rhs using "compare_and_store" routine
2275 // TYPE - operands' type
2276 // BITS - size in bits, used to distinguish low level calls
2277 // OP - operator
2278 // Note: temp_val introduced in order to force the compiler to read
2279 // *lhs only once (w/o it the compiler reads *lhs twice)
2280 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2281  { \
2282  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2283  TYPE old_value, new_value; \
2284  temp_val = *lhs; \
2285  old_value = temp_val; \
2286  new_value = rhs OP old_value; \
2287  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2288  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2289  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2290  { \
2291  KMP_CPU_PAUSE(); \
2292  \
2293  temp_val = *lhs; \
2294  old_value = temp_val; \
2295  new_value = rhs OP old_value; \
2296  } \
2297  if( flag ) { \
2298  return new_value; \
2299  } else \
2300  return old_value; \
2301  }
2302 
2303 // -------------------------------------------------------------------------
2304 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2305 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2306  TYPE new_value; \
2307  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2308  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2309  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2310 }
2311 
2312 
2313 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2314 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2315 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2316 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2317 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2318 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2319 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2320 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2321 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2322 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2323 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2324 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2325 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2326 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2327 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2328 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2329 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2330 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2331 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2332 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2333 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2334 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2335 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2336 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2337 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2338 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2339 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2340 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2341 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2342 
2343 
2344 // ------------------------------------------------------------------------
2345 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2346 // TYPE_ID, OP_ID, TYPE - detailed above
2347 // OP - operator
2348 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2349 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2350 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2351  TYPE new_value; \
2352  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2353  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2354  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2355  OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2356 }
2357 
2358 
2359 /* ------------------------------------------------------------------------- */
2360 // routines for long double type
2361 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2362 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
2363 // routines for _Quad type
2364 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2365 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2366 #if ( KMP_ARCH_X86 )
2367  ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2368  ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2369 #endif
2370 
2371 // routines for complex types
2372 
2373 // ------------------------------------------------------------------------
2374 
2375 // Workaround for cmplx4. Regular routines with return value don't work
2376 // on Win_32e. Let's return captured values through the additional parameter.
2377 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2378  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2379  \
2380  if( flag ) { \
2381  (*lhs) = (rhs) OP (*lhs); \
2382  (*out) = (*lhs); \
2383  } else { \
2384  (*out) = (*lhs); \
2385  (*lhs) = (rhs) OP (*lhs); \
2386  } \
2387  \
2388  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2389  return;
2390 // ------------------------------------------------------------------------
2391 
2392 #ifdef KMP_GOMP_COMPAT
2393 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2394  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2395  KMP_CHECK_GTID; \
2396  OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2397  }
2398 #else
2399 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2400 #endif /* KMP_GOMP_COMPAT */
2401 // ------------------------------------------------------------------------
2402 
2403 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2404 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2405  OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2406  OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2407 }
2408 // The end of workaround for cmplx4
2409 
2410 
2411 // !!! TODO: check if we need to return void for cmplx4 routines
2412 // cmplx4 routines to return void
2413 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2414 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2415 
2416 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2417 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2418 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2419 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
2420 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2421 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2422 #if ( KMP_ARCH_X86 )
2423  ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2424  ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2425 #endif
2426 
2427 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2428 
2429 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2430 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2431 { \
2432  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2433  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2434 
2435 #define CRITICAL_SWP(LCK_ID) \
2436  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2437  \
2438  old_value = (*lhs); \
2439  (*lhs) = rhs; \
2440  \
2441  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2442  return old_value;
2443 
2444 // ------------------------------------------------------------------------
2445 #ifdef KMP_GOMP_COMPAT
2446 #define GOMP_CRITICAL_SWP(FLAG) \
2447  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2448  KMP_CHECK_GTID; \
2449  CRITICAL_SWP( 0 ); \
2450  }
2451 #else
2452 #define GOMP_CRITICAL_SWP(FLAG)
2453 #endif /* KMP_GOMP_COMPAT */
2454 
2455 
2456 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2457 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2458  TYPE old_value; \
2459  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2460  old_value = __kmp_xchg_fixed##BITS( lhs, rhs ); \
2461  return old_value; \
2462 }
2463 // ------------------------------------------------------------------------
2464 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2465 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2466  TYPE old_value; \
2467  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2468  old_value = __kmp_xchg_real##BITS( lhs, rhs ); \
2469  return old_value; \
2470 }
2471 
2472 // ------------------------------------------------------------------------
2473 #define CMPXCHG_SWP(TYPE,BITS) \
2474  { \
2475  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2476  TYPE old_value, new_value; \
2477  temp_val = *lhs; \
2478  old_value = temp_val; \
2479  new_value = rhs; \
2480  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2481  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2482  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2483  { \
2484  KMP_CPU_PAUSE(); \
2485  \
2486  temp_val = *lhs; \
2487  old_value = temp_val; \
2488  new_value = rhs; \
2489  } \
2490  return old_value; \
2491  }
2492 
2493 // -------------------------------------------------------------------------
2494 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2495 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2496  TYPE old_value; \
2497  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2498  CMPXCHG_SWP(TYPE,BITS) \
2499 }
2500 
2501 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2502 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2503 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2504 
2505 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2506 
2507 #if ( KMP_ARCH_X86 )
2508  ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2509  ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2510 #else
2511  ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2512  ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2513 #endif
2514 
2515 // ------------------------------------------------------------------------
2516 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2517 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2518 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2519  TYPE old_value; \
2520  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2521  CRITICAL_SWP(LCK_ID) \
2522 }
2523 
2524 // ------------------------------------------------------------------------
2525 
2526 // !!! TODO: check if we need to return void for cmplx4 routines
2527 // Workaround for cmplx4. Regular routines with return value don't work
2528 // on Win_32e. Let's return captured values through the additional parameter.
2529 
2530 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2531 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2532 { \
2533  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2534  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2535 
2536 
2537 #define CRITICAL_SWP_WRK(LCK_ID) \
2538  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2539  \
2540  tmp = (*lhs); \
2541  (*lhs) = (rhs); \
2542  (*out) = tmp; \
2543  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2544  return;
2545 
2546 // ------------------------------------------------------------------------
2547 
2548 #ifdef KMP_GOMP_COMPAT
2549 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
2550  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2551  KMP_CHECK_GTID; \
2552  CRITICAL_SWP_WRK( 0 ); \
2553  }
2554 #else
2555 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2556 #endif /* KMP_GOMP_COMPAT */
2557 // ------------------------------------------------------------------------
2558 
2559 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2560 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2561  TYPE tmp; \
2562  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2563  CRITICAL_SWP_WRK(LCK_ID) \
2564 }
2565 // The end of workaround for cmplx4
2566 
2567 
2568 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
2569 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
2570 // cmplx4 routine to return void
2571 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2572 
2573 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2574 
2575 
2576 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2577 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
2578 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2579 #if ( KMP_ARCH_X86 )
2580  ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2581  ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2582 #endif
2583 
2584 
2585 // End of OpenMP 4.0 Capture
2586 
2587 #endif //OMP_40_ENABLED
2588 
2589 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2590 
2591 
2592 #undef OP_CRITICAL
2593 
2594 /* ------------------------------------------------------------------------ */
2595 /* Generic atomic routines */
2596 /* ------------------------------------------------------------------------ */
2597 
2598 void
2599 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2600 {
2601  KMP_DEBUG_ASSERT( __kmp_init_serial );
2602 
2603  if (
2604 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2605  FALSE /* must use lock */
2606 #else
2607  TRUE
2608 #endif
2609  )
2610  {
2611  kmp_int8 old_value, new_value;
2612 
2613  old_value = *(kmp_int8 *) lhs;
2614  (*f)( &new_value, &old_value, rhs );
2615 
2616  /* TODO: Should this be acquire or release? */
2617  while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2618  *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2619  {
2620  KMP_CPU_PAUSE();
2621 
2622  old_value = *(kmp_int8 *) lhs;
2623  (*f)( &new_value, &old_value, rhs );
2624  }
2625 
2626  return;
2627  }
2628  else {
2629  //
2630  // All 1-byte data is of integer data type.
2631  //
2632 
2633 #ifdef KMP_GOMP_COMPAT
2634  if ( __kmp_atomic_mode == 2 ) {
2635  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2636  }
2637  else
2638 #endif /* KMP_GOMP_COMPAT */
2639  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2640 
2641  (*f)( lhs, lhs, rhs );
2642 
2643 #ifdef KMP_GOMP_COMPAT
2644  if ( __kmp_atomic_mode == 2 ) {
2645  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2646  }
2647  else
2648 #endif /* KMP_GOMP_COMPAT */
2649  __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2650  }
2651 }
2652 
2653 void
2654 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2655 {
2656  if (
2657 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2658  FALSE /* must use lock */
2659 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2660  TRUE /* no alignment problems */
2661 #else
2662  ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2663 #endif
2664  )
2665  {
2666  kmp_int16 old_value, new_value;
2667 
2668  old_value = *(kmp_int16 *) lhs;
2669  (*f)( &new_value, &old_value, rhs );
2670 
2671  /* TODO: Should this be acquire or release? */
2672  while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2673  *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2674  {
2675  KMP_CPU_PAUSE();
2676 
2677  old_value = *(kmp_int16 *) lhs;
2678  (*f)( &new_value, &old_value, rhs );
2679  }
2680 
2681  return;
2682  }
2683  else {
2684  //
2685  // All 2-byte data is of integer data type.
2686  //
2687 
2688 #ifdef KMP_GOMP_COMPAT
2689  if ( __kmp_atomic_mode == 2 ) {
2690  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2691  }
2692  else
2693 #endif /* KMP_GOMP_COMPAT */
2694  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2695 
2696  (*f)( lhs, lhs, rhs );
2697 
2698 #ifdef KMP_GOMP_COMPAT
2699  if ( __kmp_atomic_mode == 2 ) {
2700  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2701  }
2702  else
2703 #endif /* KMP_GOMP_COMPAT */
2704  __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2705  }
2706 }
2707 
2708 void
2709 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2710 {
2711  KMP_DEBUG_ASSERT( __kmp_init_serial );
2712 
2713  if (
2714  //
2715  // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2716  // Gomp compatibility is broken if this routine is called for floats.
2717  //
2718 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2719  TRUE /* no alignment problems */
2720 #else
2721  ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2722 #endif
2723  )
2724  {
2725  kmp_int32 old_value, new_value;
2726 
2727  old_value = *(kmp_int32 *) lhs;
2728  (*f)( &new_value, &old_value, rhs );
2729 
2730  /* TODO: Should this be acquire or release? */
2731  while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2732  *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2733  {
2734  KMP_CPU_PAUSE();
2735 
2736  old_value = *(kmp_int32 *) lhs;
2737  (*f)( &new_value, &old_value, rhs );
2738  }
2739 
2740  return;
2741  }
2742  else {
2743  //
2744  // Use __kmp_atomic_lock_4i for all 4-byte data,
2745  // even if it isn't of integer data type.
2746  //
2747 
2748 #ifdef KMP_GOMP_COMPAT
2749  if ( __kmp_atomic_mode == 2 ) {
2750  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2751  }
2752  else
2753 #endif /* KMP_GOMP_COMPAT */
2754  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2755 
2756  (*f)( lhs, lhs, rhs );
2757 
2758 #ifdef KMP_GOMP_COMPAT
2759  if ( __kmp_atomic_mode == 2 ) {
2760  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2761  }
2762  else
2763 #endif /* KMP_GOMP_COMPAT */
2764  __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2765  }
2766 }
2767 
2768 void
2769 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2770 {
2771  KMP_DEBUG_ASSERT( __kmp_init_serial );
2772  if (
2773 
2774 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2775  FALSE /* must use lock */
2776 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2777  TRUE /* no alignment problems */
2778 #else
2779  ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2780 #endif
2781  )
2782  {
2783  kmp_int64 old_value, new_value;
2784 
2785  old_value = *(kmp_int64 *) lhs;
2786  (*f)( &new_value, &old_value, rhs );
2787  /* TODO: Should this be acquire or release? */
2788  while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2789  *(kmp_int64 *) &old_value,
2790  *(kmp_int64 *) &new_value ) )
2791  {
2792  KMP_CPU_PAUSE();
2793 
2794  old_value = *(kmp_int64 *) lhs;
2795  (*f)( &new_value, &old_value, rhs );
2796  }
2797 
2798  return;
2799  } else {
2800  //
2801  // Use __kmp_atomic_lock_8i for all 8-byte data,
2802  // even if it isn't of integer data type.
2803  //
2804 
2805 #ifdef KMP_GOMP_COMPAT
2806  if ( __kmp_atomic_mode == 2 ) {
2807  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2808  }
2809  else
2810 #endif /* KMP_GOMP_COMPAT */
2811  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2812 
2813  (*f)( lhs, lhs, rhs );
2814 
2815 #ifdef KMP_GOMP_COMPAT
2816  if ( __kmp_atomic_mode == 2 ) {
2817  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2818  }
2819  else
2820 #endif /* KMP_GOMP_COMPAT */
2821  __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2822  }
2823 }
2824 
2825 void
2826 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2827 {
2828  KMP_DEBUG_ASSERT( __kmp_init_serial );
2829 
2830 #ifdef KMP_GOMP_COMPAT
2831  if ( __kmp_atomic_mode == 2 ) {
2832  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2833  }
2834  else
2835 #endif /* KMP_GOMP_COMPAT */
2836  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2837 
2838  (*f)( lhs, lhs, rhs );
2839 
2840 #ifdef KMP_GOMP_COMPAT
2841  if ( __kmp_atomic_mode == 2 ) {
2842  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2843  }
2844  else
2845 #endif /* KMP_GOMP_COMPAT */
2846  __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2847 }
2848 
2849 void
2850 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2851 {
2852  KMP_DEBUG_ASSERT( __kmp_init_serial );
2853 
2854 #ifdef KMP_GOMP_COMPAT
2855  if ( __kmp_atomic_mode == 2 ) {
2856  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2857  }
2858  else
2859 #endif /* KMP_GOMP_COMPAT */
2860  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2861 
2862  (*f)( lhs, lhs, rhs );
2863 
2864 #ifdef KMP_GOMP_COMPAT
2865  if ( __kmp_atomic_mode == 2 ) {
2866  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2867  }
2868  else
2869 #endif /* KMP_GOMP_COMPAT */
2870  __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2871 }
2872 
2873 void
2874 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2875 {
2876  KMP_DEBUG_ASSERT( __kmp_init_serial );
2877 
2878 #ifdef KMP_GOMP_COMPAT
2879  if ( __kmp_atomic_mode == 2 ) {
2880  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2881  }
2882  else
2883 #endif /* KMP_GOMP_COMPAT */
2884  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2885 
2886  (*f)( lhs, lhs, rhs );
2887 
2888 #ifdef KMP_GOMP_COMPAT
2889  if ( __kmp_atomic_mode == 2 ) {
2890  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2891  }
2892  else
2893 #endif /* KMP_GOMP_COMPAT */
2894  __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2895 }
2896 
2897 void
2898 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2899 {
2900  KMP_DEBUG_ASSERT( __kmp_init_serial );
2901 
2902 #ifdef KMP_GOMP_COMPAT
2903  if ( __kmp_atomic_mode == 2 ) {
2904  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2905  }
2906  else
2907 #endif /* KMP_GOMP_COMPAT */
2908  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2909 
2910  (*f)( lhs, lhs, rhs );
2911 
2912 #ifdef KMP_GOMP_COMPAT
2913  if ( __kmp_atomic_mode == 2 ) {
2914  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2915  }
2916  else
2917 #endif /* KMP_GOMP_COMPAT */
2918  __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2919 }
2920 
2921 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
2922 // duplicated in order to not use 3-party names in pure Intel code
2923 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
2924 void
2925 __kmpc_atomic_start(void)
2926 {
2927  int gtid = __kmp_entry_gtid();
2928  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
2929  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
2930 }
2931 
2932 
2933 void
2934 __kmpc_atomic_end(void)
2935 {
2936  int gtid = __kmp_get_gtid();
2937  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
2938  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
2939 }
2940 
2941 /* ------------------------------------------------------------------------ */
2942 /* ------------------------------------------------------------------------ */
2947 // end of file