Description: workaround X32 relocation issues
 A proposal for a workaround for the x32 relocation problem.
Author: https://github.com/czurnieden
Origin: https://github.com/libtom/libtommath/pull/155
--- a/bncore.c
+++ b/bncore.c
@@ -26,6 +26,34 @@
 
         TOOM_MUL_CUTOFF      = 350,      /* no optimal values of these are known yet so set em high */
         TOOM_SQR_CUTOFF      = 400;
+
+#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS))
+int mp_get_KARATSUBA_MUL_CUTOFF(void){
+   return KARATSUBA_MUL_CUTOFF;
+}
+void mp_set_KARATSUBA_MUL_CUTOFF(int cutoff){
+   KARATSUBA_MUL_CUTOFF = cutoff;
+}
+int mp_get_KARATSUBA_SQR_CUTOFF(void){
+   return KARATSUBA_SQR_CUTOFF;
+}
+void mp_set_KARATSUBA_SQR_CUTOFF(int cutoff){
+   KARATSUBA_SQR_CUTOFF = cutoff;
+}
+int mp_get_TOOM_MUL_CUTOFF(void){
+   return TOOM_MUL_CUTOFF;
+}
+void mp_set_TOOM_MUL_CUTOFF(int cutoff){
+   TOOM_MUL_CUTOFF = cutoff;
+}
+int mp_get_TOOM_SQR_CUTOFF(void){
+   return TOOM_SQR_CUTOFF;
+}
+void mp_set_TOOM_SQR_CUTOFF(int cutoff){
+   TOOM_SQR_CUTOFF = cutoff;
+}
+#endif
+
 #endif
 
 /* ref:         HEAD -> master, tag: v1.1.0 */
--- a/demo/timing.c
+++ b/demo/timing.c
@@ -205,19 +205,29 @@
    FCLOSE(log);
 
    /* do mult/square twice, first without karatsuba and second with */
+#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS))
+   old_kara_m = mp_get_KARATSUBA_MUL_CUTOFF();
+   old_kara_s = mp_get_KARATSUBA_SQR_CUTOFF();
+#else
    old_kara_m = KARATSUBA_MUL_CUTOFF;
    old_kara_s = KARATSUBA_SQR_CUTOFF;
+#endif
    /* currently toom-cook cut-off is too high to kick in, so we just use the karatsuba values */
    old_toom_m = old_kara_m;
    old_toom_s = old_kara_m;
    for (ix = 0; ix < 3; ix++) {
       printf("With%s Karatsuba, With%s Toom\n", (ix == 0) ? "out" : "", (ix == 1) ? "out" : "");
-
+#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS))
+      mp_set_KARATSUBA_MUL_CUTOFF((ix == 1) ? old_kara_m : 9999);
+      mp_set_KARATSUBA_SQR_CUTOFF((ix == 1) ? old_kara_s : 9999);
+      mp_set_TOOM_MUL_CUTOFF((ix == 2) ? old_toom_m : 9999);
+      mp_set_TOOM_SQR_CUTOFF((ix == 2) ? old_toom_s : 9999);
+#else
       KARATSUBA_MUL_CUTOFF = (ix == 1) ? old_kara_m : 9999;
       KARATSUBA_SQR_CUTOFF = (ix == 1) ? old_kara_s : 9999;
       TOOM_MUL_CUTOFF = (ix == 2) ? old_toom_m : 9999;
       TOOM_SQR_CUTOFF = (ix == 2) ? old_toom_s : 9999;
-
+#endif
       log = FOPEN((ix == 0) ? "logs/mult.log" : (ix == 1) ? "logs/mult_kara.log" : "logs/mult_toom.log", "w");
       for (cnt = 4; cnt <= (10240 / DIGIT_BIT); cnt += 2) {
          SLEEP;
--- a/doc/bn.tex
+++ b/doc/bn.tex
@@ -126,6 +126,19 @@
 There is limited support for making a ``DLL'' in windows via the ``makefile.cygwin\_dll'' makefile.  It requires
 Cygwin to work with since it requires the auto-export/import functionality.  The resulting DLL and import library
 ``libtommath.dll.a'' can be used to link LibTomMath dynamically to any Windows program using Cygwin.
+\subsubsection{Shared Library for X32}
+It is possible to build a shared library for the x32 architecture with some restrictions. It needs GCC 4.8.0 or later and restricts the availibility of some symbols. Those symbols are:
+\begin{itemize}
+\item[] \texttt{KARATSUBA\_MUL\_CUTOFF}
+\item[] \texttt{KARATSUBA\_SQR\_CUTOFF}
+\item[] \texttt{TOOM\_MUL\_CUTOFF}
+\item[] \texttt{TOOM\_SQR\_CUTOFF}
+\item[] \texttt{ltm\_prime\_tab}
+\item[] \texttt{mp\_s\_rmap}
+\item[] \texttt{mp\_s\_rmap\_reverse}
+\item[] \texttt{mp\_s\_rmap\_reverse\_sz}
+\end{itemize}
+There are getters and setters implemented for the four Toom-Cook cut-offs if the macro \texttt{LTM\_EXPORT\_CUTOFFS} is defined. See section \ref{benchmark} for some details.
 
 \subsection{Testing}
 To build the library and the test harness type
@@ -242,6 +255,25 @@
 \end{center}
 \end{small}
 
+\subsection{Automatic Evaluation of the Cut-Off Points}\label{benchmark}
+There is a small program in \texttt{demo/timing.c} to find the ideal cut-off points for the Toom-Cook algorithms. You can build it with
+\begin{alltt}
+make timing
+\end{alltt}
+and run it with
+\begin{alltt}
+./timing
+\end{alltt}
+
+The results will show up in the directory \texttt{log}. There is a small bump for the x32 architecture if you want to benchmark a shared library because of the peculiarites of the x32 architectures. The small makefile \texttt{makefile.shared.timing} is included for that purpose. It will build a slightly modified version of the shared library that includes getters and setters for the otherwise hidden variables for the Toom-Cook algorithms which is meant for the evaulation of the cut-offs only, Build it with
+\begin{alltt}
+make -f makefile.shared.timing timing
+\end{alltt}
+and run it with
+\begin{alltt}
+./timing
+\end{alltt}
+
 
 \section{Purpose of LibTomMath}
 Unlike  GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath was not written with
--- /dev/null
+++ b/makefile.shared.timing
@@ -0,0 +1,71 @@
+#Makefile for GCC
+#
+#Tom St Denis
+
+#default files to install
+ifndef LIBNAME
+   LIBNAME=libtommath.la
+endif
+
+include makefile_include.mk
+
+
+ifndef LIBTOOL
+  ifeq ($(PLATFORM), Darwin)
+    LIBTOOL:=glibtool
+  else
+    LIBTOOL:=libtool
+  endif
+endif
+LTCOMPILE = $(LIBTOOL) --mode=compile --tag=CC $(CC)
+LTLINK = $(LIBTOOL) --mode=link --tag=CC $(CC)
+
+LCOV_ARGS=--directory .libs --directory .
+
+#START_INS
+OBJECTS=bn_error.o bn_fast_mp_invmod.o bn_fast_mp_montgomery_reduce.o bn_fast_s_mp_mul_digs.o \
+bn_fast_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_mp_2expt.o bn_mp_abs.o bn_mp_add.o bn_mp_add_d.o \
+bn_mp_addmod.o bn_mp_and.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o bn_mp_cmp_d.o \
+bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o bn_mp_div.o \
+bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o bn_mp_dr_reduce.o \
+bn_mp_dr_setup.o bn_mp_exch.o bn_mp_export.o bn_mp_expt_d.o bn_mp_expt_d_ex.o bn_mp_exptmod.o \
+bn_mp_exptmod_fast.o bn_mp_exteuclid.o bn_mp_fread.o bn_mp_fwrite.o bn_mp_gcd.o bn_mp_get_bit.o \
+bn_mp_get_double.o bn_mp_get_int.o bn_mp_get_long.o bn_mp_get_long_long.o bn_mp_grow.o bn_mp_import.o \
+bn_mp_init.o bn_mp_init_copy.o bn_mp_init_multi.o bn_mp_init_set.o bn_mp_init_set_int.o bn_mp_init_size.o \
+bn_mp_invmod.o bn_mp_invmod_slow.o bn_mp_is_square.o bn_mp_jacobi.o bn_mp_karatsuba_mul.o \
+bn_mp_karatsuba_sqr.o bn_mp_kronecker.o bn_mp_lcm.o bn_mp_lshd.o bn_mp_mod.o bn_mp_mod_2d.o bn_mp_mod_d.o \
+bn_mp_montgomery_calc_normalization.o bn_mp_montgomery_reduce.o bn_mp_montgomery_setup.o bn_mp_mul.o \
+bn_mp_mul_2.o bn_mp_mul_2d.o bn_mp_mul_d.o bn_mp_mulmod.o bn_mp_n_root.o bn_mp_n_root_ex.o bn_mp_neg.o \
+bn_mp_or.o bn_mp_prime_fermat.o bn_mp_prime_frobenius_underwood.o bn_mp_prime_is_divisible.o \
+bn_mp_prime_is_prime.o bn_mp_prime_miller_rabin.o bn_mp_prime_next_prime.o \
+bn_mp_prime_rabin_miller_trials.o bn_mp_prime_random_ex.o bn_mp_prime_strong_lucas_selfridge.o \
+bn_mp_radix_size.o bn_mp_radix_smap.o bn_mp_rand.o bn_mp_read_radix.o bn_mp_read_signed_bin.o \
+bn_mp_read_unsigned_bin.o bn_mp_reduce.o bn_mp_reduce_2k.o bn_mp_reduce_2k_l.o bn_mp_reduce_2k_setup.o \
+bn_mp_reduce_2k_setup_l.o bn_mp_reduce_is_2k.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_setup.o bn_mp_rshd.o \
+bn_mp_set.o bn_mp_set_double.o bn_mp_set_int.o bn_mp_set_long.o bn_mp_set_long_long.o bn_mp_shrink.o \
+bn_mp_signed_bin_size.o bn_mp_sqr.o bn_mp_sqrmod.o bn_mp_sqrt.o bn_mp_sqrtmod_prime.o bn_mp_sub.o \
+bn_mp_sub_d.o bn_mp_submod.o bn_mp_tc_and.o bn_mp_tc_div_2d.o bn_mp_tc_or.o bn_mp_tc_xor.o \
+bn_mp_to_signed_bin.o bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin.o bn_mp_to_unsigned_bin_n.o \
+bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_toradix.o bn_mp_toradix_n.o bn_mp_unsigned_bin_size.o bn_mp_xor.o \
+bn_mp_zero.o bn_prime_tab.o bn_reverse.o bn_s_mp_add.o bn_s_mp_exptmod.o bn_s_mp_mul_digs.o \
+bn_s_mp_mul_high_digs.o bn_s_mp_sqr.o bn_s_mp_sub.o bncore.o
+
+#END_INS
+
+objs: $(OBJECTS)
+
+.c.o:
+	$(LTCOMPILE) $(CFLAGS) -DLTM_EXPORT_CUTOFFS $(LDFLAGS) -o $@ -c $<
+
+LOBJECTS = $(OBJECTS:.o=.lo)
+
+$(LIBNAME):  $(OBJECTS)
+	$(LTLINK) $(LDFLAGS) $(LOBJECTS) -DLTM_EXPORT_CUTOFFS -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION_SO) $(LIBTOOLFLAGS)
+
+timing: $(LIBNAME) demo/timing.c
+	$(LTLINK) $(CFLAGS) $(LDFLAGS) -DLTM_EXPORT_CUTOFFS  -DTIMER demo/timing.c $(LIBNAME) -o timing
+	@echo ""
+	@echo "  This makefile builds a special dynamic library for timing"
+	@echo "  purpose only!"
+	@echo "  Please do a make clean && make -f makefile.shared to make"
+	@echo "  the production version of the dynamic LibTomMath."
--- a/tommath.h
+++ b/tommath.h
@@ -19,6 +19,35 @@
 
 #include "tommath_class.h"
 
+
+#ifdef __GNUC__
+#define LTM_GNU_VERSION (__GNUC__ * 10000 \
+                              + __GNUC_MINOR__ * 100 \
+                                 + __GNUC_PATCHLEVEL__)
+#endif
+
+/* 
+   __attribute__((visibility ("hidden"))) is in GCC since 3.3.x but the exact patchlevel
+   is unknown to the author. The recommended minimum GCC version is 4.8.0 according to
+   https://sites.google.com/site/x32abi/ .
+ */
+/* TODO: __attribute__((visibility ("hidden"))) is also supported by the Intel compiler */
+#if (LTM_GNU_VERSION >= 40800)
+/* Workaround for x32 relocation problems */
+#   if ((defined __x86_64__ ) && (defined __ILP32__))
+#      define LTM_VISIBILITY_HIDDEN __attribute__((visibility ("hidden")))
+/* 
+   Optimizing the cut-off for e.g.: the Toom-Cook algorithms need to change otherwise hidden variables
+   Define LTM_EXPORT_CUTOFFS to get the workarounds instead.
+ */
+#      define LTM_NEED_EXPLICIT_EXPORT
+#   else
+#      define LTM_VISIBILITY_HIDDEN
+#   endif
+#else
+#   define LTM_VISIBILITY_HIDDEN
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -127,10 +156,21 @@
 typedef int           mp_err;
 
 /* you'll have to tune these... */
-extern int KARATSUBA_MUL_CUTOFF,
-       KARATSUBA_SQR_CUTOFF,
-       TOOM_MUL_CUTOFF,
-       TOOM_SQR_CUTOFF;
+#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS))
+int mp_get_KARATSUBA_MUL_CUTOFF(void);
+void mp_set_KARATSUBA_MUL_CUTOFF(int cutoff);
+int mp_get_KARATSUBA_SQR_CUTOFF(void);
+void mp_set_KARATSUBA_SQR_CUTOFF(int cutoff);
+int mp_get_TOOM_MUL_CUTOFF(void);
+void mp_set_TOOM_MUL_CUTOFF(int cutoff);
+int mp_get_TOOM_SQR_CUTOFF(void);
+void mp_set_TOOM_SQR_CUTOFF(int cutoff);
+#endif
+extern int LTM_VISIBILITY_HIDDEN KARATSUBA_MUL_CUTOFF;
+extern int LTM_VISIBILITY_HIDDEN KARATSUBA_SQR_CUTOFF;
+extern int LTM_VISIBILITY_HIDDEN TOOM_MUL_CUTOFF;
+extern int LTM_VISIBILITY_HIDDEN TOOM_SQR_CUTOFF;
+
 
 /* define this to use lower memory usage routines (exptmods mostly) */
 /* #define MP_LOW_MEM */
@@ -489,7 +529,7 @@
 #endif
 
 /* table of first PRIME_SIZE primes */
-extern const mp_digit ltm_prime_tab[PRIME_SIZE];
+extern const mp_digit LTM_VISIBILITY_HIDDEN ltm_prime_tab[PRIME_SIZE];
 
 /* result=1 if a is divisible by one of the first PRIME_SIZE primes */
 int mp_prime_is_divisible(const mp_int *a, int *result);
--- a/tommath_private.h
+++ b/tommath_private.h
@@ -72,9 +72,9 @@
 int s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode);
 void bn_reverse(unsigned char *s, int len);
 
-extern const char *const mp_s_rmap;
-extern const uint8_t mp_s_rmap_reverse[];
-extern const size_t mp_s_rmap_reverse_sz;
+extern const char LTM_VISIBILITY_HIDDEN *const mp_s_rmap;
+extern const uint8_t LTM_VISIBILITY_HIDDEN mp_s_rmap_reverse[];
+extern const size_t LTM_VISIBILITY_HIDDEN mp_s_rmap_reverse_sz;
 
 /* Fancy macro to set an MPI from another type.
  * There are several things assumed:
