[147610] trunk/dports/science/lal

ram at macports.org ram at macports.org
Sat Apr 9 14:18:47 PDT 2016


Revision: 147610
          https://trac.macports.org/changeset/147610
Author:   ram at macports.org
Date:     2016-04-09 14:18:47 -0700 (Sat, 09 Apr 2016)
Log Message:
-----------
science/lal: fix build with newer Xcode, fixes #51096

Modified Paths:
--------------
    trunk/dports/science/lal/Portfile

Added Paths:
-----------
    trunk/dports/science/lal/files/
    trunk/dports/science/lal/files/9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff

Modified: trunk/dports/science/lal/Portfile
===================================================================
--- trunk/dports/science/lal/Portfile	2016-04-09 20:33:34 UTC (rev 147609)
+++ trunk/dports/science/lal/Portfile	2016-04-09 21:18:47 UTC (rev 147610)
@@ -4,6 +4,7 @@
 
 name          lal
 version       6.15.2
+revision      1
 categories    science
 platforms     darwin
 maintainers   ram
@@ -20,6 +21,9 @@
 checksums     rmd160 ebe0e18eab2edfdf6b77d8bf4d5b02e1b43d93d3 \
               sha256 769722c047992eba2e5b0bebb182cb9a99584fac91def3009fc9676c4e670700
 
+patchfiles    9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff
+patch.pre_args -p2
+
 depends_build-append port:pkgconfig
 
 depends_lib   port:python27 \

Added: trunk/dports/science/lal/files/9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff
===================================================================
--- trunk/dports/science/lal/files/9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff	                        (rev 0)
+++ trunk/dports/science/lal/files/9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff	2016-04-09 21:18:47 UTC (rev 147610)
@@ -0,0 +1,172 @@
+From 9f725309cc54c73ed03e5bfe2502e89217ee7dae Mon Sep 17 00:00:00 2001
+From: Karl Wette <karl.wette at ligo.org>
+Date: Thu, 24 Mar 2016 01:45:37 +0100
+Subject: VectorMath: rename local replacements for AVX2 intrinsics
+
+- On systems (e.g. Macs, of course) avx2intrin.h is always included
+  by immintrin.h regardless of __AVX2__, i.e. one cannot assume AVX2
+  intrinsics are not defined if __AVX2__ is not defined.
+- Work around by renaming local replacements for AVX2 intrinsics
+  by prefixing with "_mathfun". When __AVX2__ is defined, these
+  are #defined to the system-supplied AVX2 intrinsics, otherwise
+  they are replaced by local non-AVX2 implementations.
+---
+ lal/src/vectorops/VectorMath_avx_mathfun.h | 49 ++++++++++++++++++------------
+ 1 file changed, 30 insertions(+), 19 deletions(-)
+
+diff --git a/lal/src/vectorops/VectorMath_avx_mathfun.h b/lal/src/vectorops/VectorMath_avx_mathfun.h
+index 0563440..7958ac1 100644
+--- a/lal/src/vectorops/VectorMath_avx_mathfun.h
++++ b/lal/src/vectorops/VectorMath_avx_mathfun.h
+@@ -116,7 +116,18 @@ _PS256_CONST(cephes_log_p8, + 3.3333331174E-1);
+ _PS256_CONST(cephes_log_q1, -2.12194440e-4);
+ _PS256_CONST(cephes_log_q2, 0.693359375);
+ 
+-#ifndef __AVX2__
++#ifdef __AVX2__
++
++#define _mathfun_mm256_slli_epi32 _mm256_slli_epi32
++#define _mathfun_mm256_srli_epi32 _mm256_srli_epi32
++
++#define _mathfun_mm256_and_si128 _mm256_and_si128
++#define _mathfun_mm256_andnot_si128 _mm256_andnot_si128
++#define _mathfun_mm256_cmpeq_epi32 _mm256_cmpeq_epi32
++#define _mathfun_mm256_sub_epi32 _mm256_sub_epi32
++#define _mathfun_mm256_add_epi32 _mm256_add_epi32
++
++#else
+ 
+ typedef union imm_xmm_union {
+   v8si imm;
+@@ -138,7 +149,7 @@ typedef union imm_xmm_union {
+ 
+ 
+ #define AVX2_BITOP_USING_SSE2(fn) \
+-static inline v8si _mm256_##fn(v8si x, int a) \
++static inline v8si _mathfun_mm256_##fn(v8si x, int a) \
+ { \
+   /* use SSE2 instruction to perform the bitop AVX2 */ \
+   v4sii x1, x2; \
+@@ -154,7 +165,7 @@ AVX2_BITOP_USING_SSE2(slli_epi32)
+ AVX2_BITOP_USING_SSE2(srli_epi32)
+ 
+ #define AVX2_INTOP_USING_SSE2(fn) \
+-static inline v8si _mm256_##fn(v8si x, v8si y) \
++static inline v8si _mathfun_mm256_##fn(v8si x, v8si y) \
+ { \
+   /* use SSE2 instructions to perform the AVX2 integer operation */ \
+   v4sii x1, x2; \
+@@ -190,14 +201,14 @@ v8sf log256_ps(v8sf x) {
+   x = _mm256_max_ps(x, _ps256_min_norm_pos.v);  /* cut off denormalized stuff */
+ 
+   // can be done with AVX2
+-  imm0 = _mm256_srli_epi32(_mm256_castps_si256(x), 23);
++  imm0 = _mathfun_mm256_srli_epi32(_mm256_castps_si256(x), 23);
+ 
+   /* keep only the fractional part */
+   x = _mm256_and_ps(x, _ps256_inv_mant_mask.v);
+   x = _mm256_or_ps(x, _ps256_0p5.v);
+ 
+   // this is again another AVX2 instruction
+-  imm0 = _mm256_sub_epi32(imm0, _pi32_256_0x7f.vi);
++  imm0 = _mathfun_mm256_sub_epi32(imm0, _pi32_256_0x7f.vi);
+   v8sf e = _mm256_cvtepi32_ps(imm0);
+ 
+   e = _mm256_add_ps(e, one);
+@@ -315,8 +326,8 @@ v8sf exp256_ps(v8sf x) {
+   /* build 2^n */
+   imm0 = _mm256_cvttps_epi32(fx);
+   // another two AVX2 instructions
+-  imm0 = _mm256_add_epi32(imm0, _pi32_256_0x7f.vi);
+-  imm0 = _mm256_slli_epi32(imm0, 23);
++  imm0 = _mathfun_mm256_add_epi32(imm0, _pi32_256_0x7f.vi);
++  imm0 = _mathfun_mm256_slli_epi32(imm0, 23);
+   v8sf pow2n = _mm256_castsi256_ps(imm0);
+   y = _mm256_mul_ps(y, pow2n);
+   return y;
+@@ -375,13 +386,13 @@ v8sf sin256_ps(v8sf x) { // any x
+   imm2 = _mm256_cvttps_epi32(y);
+   /* j=(j+1) & (~1) (see the cephes sources) */
+   // another two AVX2 instruction
+-  imm2 = _mm256_add_epi32(imm2, _pi32_256_1.vi);
++  imm2 = _mathfun_mm256_add_epi32(imm2, _pi32_256_1.vi);
+   imm2 = _mm256_and_si256(imm2, _pi32_256_inv1.vi);
+   y = _mm256_cvtepi32_ps(imm2);
+ 
+   /* get the swap sign flag */
+   imm0 = _mm256_and_si256(imm2, _pi32_256_4.vi);
+-  imm0 = _mm256_slli_epi32(imm0, 29);
++  imm0 = _mathfun_mm256_slli_epi32(imm0, 29);
+   /* get the polynom selection mask
+      there is one polynom for 0 <= x <= Pi/4
+      and another one for Pi/4<x<=Pi/2
+@@ -389,7 +400,7 @@ v8sf sin256_ps(v8sf x) { // any x
+      Both branches will be computed.
+   */
+   imm2 = _mm256_and_si256(imm2, _pi32_256_2.vi);
+-  imm2 = _mm256_cmpeq_epi32(imm2,_pi32_256_0.vi);
++  imm2 = _mathfun_mm256_cmpeq_epi32(imm2,_pi32_256_0.vi);
+ #else
+   /* we use SSE2 routines to perform the integer ops */
+   COPY_IMM_TO_XMM(_mm256_cvttps_epi32(y),imm2_1,imm2_2);
+@@ -492,17 +503,17 @@ v8sf cos256_ps(v8sf x) { // any x
+   /* store the integer part of y in mm0 */
+   imm2 = _mm256_cvttps_epi32(y);
+   /* j=(j+1) & (~1) (see the cephes sources) */
+-  imm2 = _mm256_add_epi32(imm2, _pi32_256_1.vi);
++  imm2 = _mathfun_mm256_add_epi32(imm2, _pi32_256_1.vi);
+   imm2 = _mm256_and_si256(imm2, _pi32_256_inv1.vi);
+   y = _mm256_cvtepi32_ps(imm2);
+-  imm2 = _mm256_sub_epi32(imm2, _pi32_256_2.vi);
++  imm2 = _mathfun_mm256_sub_epi32(imm2, _pi32_256_2.vi);
+ 
+   /* get the swap sign flag */
+   imm0 = _mm256_andnot_si256(imm2, _pi32_256_4.vi);
+-  imm0 = _mm256_slli_epi32(imm0, 29);
++  imm0 = _mathfun_mm256_slli_epi32(imm0, 29);
+   /* get the polynom selection mask */
+   imm2 = _mm256_and_si256(imm2, _pi32_256_2.vi);
+-  imm2 = _mm256_cmpeq_epi32(imm2, _pi32_256_0.vi);
++  imm2 = _mathfun_mm256_cmpeq_epi32(imm2, _pi32_256_0.vi);
+ #else
+ 
+   /* we use SSE2 routines to perform the integer ops */
+@@ -615,7 +626,7 @@ void sincos256_ps(v8sf x, v8sf *s, v8sf *c) {
+   imm2 = _mm256_cvttps_epi32(y);
+ 
+   /* j=(j+1) & (~1) (see the cephes sources) */
+-  imm2 = _mm256_add_epi32(imm2, _pi32_256_1.vi);
++  imm2 = _mathfun_mm256_add_epi32(imm2, _pi32_256_1.vi);
+   imm2 = _mm256_and_si256(imm2, _pi32_256_inv1.vi);
+ 
+   y = _mm256_cvtepi32_ps(imm2);
+@@ -623,12 +634,12 @@ void sincos256_ps(v8sf x, v8sf *s, v8sf *c) {
+ 
+   /* get the swap sign flag for the sine */
+   imm0 = _mm256_and_si256(imm2, _pi32_256_4.vi);
+-  imm0 = _mm256_slli_epi32(imm0, 29);
++  imm0 = _mathfun_mm256_slli_epi32(imm0, 29);
+   //v8sf swap_sign_bit_sin = _mm256_castsi256_ps(imm0);
+ 
+   /* get the polynom selection mask for the sine*/
+   imm2 = _mm256_and_si256(imm2, _pi32_256_2.vi);
+-  imm2 = _mm256_cmpeq_epi32(imm2, _pi32_256_0.vi);
++  imm2 = _mathfun_mm256_cmpeq_epi32(imm2, _pi32_256_0.vi);
+   //v8sf poly_mask = _mm256_castsi256_ps(imm2);
+ #else
+   /* we use SSE2 routines to perform the integer ops */
+@@ -678,9 +689,9 @@ void sincos256_ps(v8sf x, v8sf *s, v8sf *c) {
+   x = _mm256_add_ps(x, xmm3);
+ 
+ #ifdef __AVX2__
+-  imm4 = _mm256_sub_epi32(imm4, _pi32_256_2.vi);
++  imm4 = _mathfun_mm256_sub_epi32(imm4, _pi32_256_2.vi);
+   imm4 = _mm256_andnot_si256(imm4, _pi32_256_4.vi);
+-  imm4 = _mm256_slli_epi32(imm4, 29);
++  imm4 = _mathfun_mm256_slli_epi32(imm4, 29);
+ #else
+   imm4_1 = _mm_sub_epi32(imm4_1, _pi32avx_2.v4i);
+   imm4_2 = _mm_sub_epi32(imm4_2, _pi32avx_2.v4i);
+-- 
+cgit v0.11.2
+
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/macports-changes/attachments/20160409/f583b023/attachment.html>


More information about the macports-changes mailing list