[147610] trunk/dports/science/lal
ram at macports.org
ram at macports.org
Sat Apr 9 14:18:47 PDT 2016
Revision: 147610
https://trac.macports.org/changeset/147610
Author: ram at macports.org
Date: 2016-04-09 14:18:47 -0700 (Sat, 09 Apr 2016)
Log Message:
-----------
science/lal: fix build with newer Xcode, fixes #51096
Modified Paths:
--------------
trunk/dports/science/lal/Portfile
Added Paths:
-----------
trunk/dports/science/lal/files/
trunk/dports/science/lal/files/9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff
Modified: trunk/dports/science/lal/Portfile
===================================================================
--- trunk/dports/science/lal/Portfile 2016-04-09 20:33:34 UTC (rev 147609)
+++ trunk/dports/science/lal/Portfile 2016-04-09 21:18:47 UTC (rev 147610)
@@ -4,6 +4,7 @@
name lal
version 6.15.2
+revision 1
categories science
platforms darwin
maintainers ram
@@ -20,6 +21,9 @@
checksums rmd160 ebe0e18eab2edfdf6b77d8bf4d5b02e1b43d93d3 \
sha256 769722c047992eba2e5b0bebb182cb9a99584fac91def3009fc9676c4e670700
+patchfiles 9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff
+patch.pre_args -p2
+
depends_build-append port:pkgconfig
depends_lib port:python27 \
Added: trunk/dports/science/lal/files/9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff
===================================================================
--- trunk/dports/science/lal/files/9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff (rev 0)
+++ trunk/dports/science/lal/files/9f725309cc54c73ed03e5bfe2502e89217ee7dae.diff 2016-04-09 21:18:47 UTC (rev 147610)
@@ -0,0 +1,172 @@
+From 9f725309cc54c73ed03e5bfe2502e89217ee7dae Mon Sep 17 00:00:00 2001
+From: Karl Wette <karl.wette at ligo.org>
+Date: Thu, 24 Mar 2016 01:45:37 +0100
+Subject: VectorMath: rename local replacements for AVX2 intrinsics
+
+- On systems (e.g. Macs, of course) avx2intrin.h is always included
+ by immintrin.h regardless of __AVX2__, i.e. one cannot assume AVX2
+ intrinsics are not defined if __AVX2__ is not defined.
+- Work around by renaming local replacements for AVX2 intrinsics
+ by prefixing with "_mathfun". When __AVX2__ is defined, these
+ are #defined to the system-supplied AVX2 intrinsics, otherwise
+ they are replaced by local non-AVX2 implementations.
+---
+ lal/src/vectorops/VectorMath_avx_mathfun.h | 49 ++++++++++++++++++------------
+ 1 file changed, 30 insertions(+), 19 deletions(-)
+
+diff --git a/lal/src/vectorops/VectorMath_avx_mathfun.h b/lal/src/vectorops/VectorMath_avx_mathfun.h
+index 0563440..7958ac1 100644
+--- a/lal/src/vectorops/VectorMath_avx_mathfun.h
++++ b/lal/src/vectorops/VectorMath_avx_mathfun.h
+@@ -116,7 +116,18 @@ _PS256_CONST(cephes_log_p8, + 3.3333331174E-1);
+ _PS256_CONST(cephes_log_q1, -2.12194440e-4);
+ _PS256_CONST(cephes_log_q2, 0.693359375);
+
+-#ifndef __AVX2__
++#ifdef __AVX2__
++
++#define _mathfun_mm256_slli_epi32 _mm256_slli_epi32
++#define _mathfun_mm256_srli_epi32 _mm256_srli_epi32
++
++#define _mathfun_mm256_and_si128 _mm256_and_si128
++#define _mathfun_mm256_andnot_si128 _mm256_andnot_si128
++#define _mathfun_mm256_cmpeq_epi32 _mm256_cmpeq_epi32
++#define _mathfun_mm256_sub_epi32 _mm256_sub_epi32
++#define _mathfun_mm256_add_epi32 _mm256_add_epi32
++
++#else
+
+ typedef union imm_xmm_union {
+ v8si imm;
+@@ -138,7 +149,7 @@ typedef union imm_xmm_union {
+
+
+ #define AVX2_BITOP_USING_SSE2(fn) \
+-static inline v8si _mm256_##fn(v8si x, int a) \
++static inline v8si _mathfun_mm256_##fn(v8si x, int a) \
+ { \
+ /* use SSE2 instruction to perform the bitop AVX2 */ \
+ v4sii x1, x2; \
+@@ -154,7 +165,7 @@ AVX2_BITOP_USING_SSE2(slli_epi32)
+ AVX2_BITOP_USING_SSE2(srli_epi32)
+
+ #define AVX2_INTOP_USING_SSE2(fn) \
+-static inline v8si _mm256_##fn(v8si x, v8si y) \
++static inline v8si _mathfun_mm256_##fn(v8si x, v8si y) \
+ { \
+ /* use SSE2 instructions to perform the AVX2 integer operation */ \
+ v4sii x1, x2; \
+@@ -190,14 +201,14 @@ v8sf log256_ps(v8sf x) {
+ x = _mm256_max_ps(x, _ps256_min_norm_pos.v); /* cut off denormalized stuff */
+
+ // can be done with AVX2
+- imm0 = _mm256_srli_epi32(_mm256_castps_si256(x), 23);
++ imm0 = _mathfun_mm256_srli_epi32(_mm256_castps_si256(x), 23);
+
+ /* keep only the fractional part */
+ x = _mm256_and_ps(x, _ps256_inv_mant_mask.v);
+ x = _mm256_or_ps(x, _ps256_0p5.v);
+
+ // this is again another AVX2 instruction
+- imm0 = _mm256_sub_epi32(imm0, _pi32_256_0x7f.vi);
++ imm0 = _mathfun_mm256_sub_epi32(imm0, _pi32_256_0x7f.vi);
+ v8sf e = _mm256_cvtepi32_ps(imm0);
+
+ e = _mm256_add_ps(e, one);
+@@ -315,8 +326,8 @@ v8sf exp256_ps(v8sf x) {
+ /* build 2^n */
+ imm0 = _mm256_cvttps_epi32(fx);
+ // another two AVX2 instructions
+- imm0 = _mm256_add_epi32(imm0, _pi32_256_0x7f.vi);
+- imm0 = _mm256_slli_epi32(imm0, 23);
++ imm0 = _mathfun_mm256_add_epi32(imm0, _pi32_256_0x7f.vi);
++ imm0 = _mathfun_mm256_slli_epi32(imm0, 23);
+ v8sf pow2n = _mm256_castsi256_ps(imm0);
+ y = _mm256_mul_ps(y, pow2n);
+ return y;
+@@ -375,13 +386,13 @@ v8sf sin256_ps(v8sf x) { // any x
+ imm2 = _mm256_cvttps_epi32(y);
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ // another two AVX2 instruction
+- imm2 = _mm256_add_epi32(imm2, _pi32_256_1.vi);
++ imm2 = _mathfun_mm256_add_epi32(imm2, _pi32_256_1.vi);
+ imm2 = _mm256_and_si256(imm2, _pi32_256_inv1.vi);
+ y = _mm256_cvtepi32_ps(imm2);
+
+ /* get the swap sign flag */
+ imm0 = _mm256_and_si256(imm2, _pi32_256_4.vi);
+- imm0 = _mm256_slli_epi32(imm0, 29);
++ imm0 = _mathfun_mm256_slli_epi32(imm0, 29);
+ /* get the polynom selection mask
+ there is one polynom for 0 <= x <= Pi/4
+ and another one for Pi/4<x<=Pi/2
+@@ -389,7 +400,7 @@ v8sf sin256_ps(v8sf x) { // any x
+ Both branches will be computed.
+ */
+ imm2 = _mm256_and_si256(imm2, _pi32_256_2.vi);
+- imm2 = _mm256_cmpeq_epi32(imm2,_pi32_256_0.vi);
++ imm2 = _mathfun_mm256_cmpeq_epi32(imm2,_pi32_256_0.vi);
+ #else
+ /* we use SSE2 routines to perform the integer ops */
+ COPY_IMM_TO_XMM(_mm256_cvttps_epi32(y),imm2_1,imm2_2);
+@@ -492,17 +503,17 @@ v8sf cos256_ps(v8sf x) { // any x
+ /* store the integer part of y in mm0 */
+ imm2 = _mm256_cvttps_epi32(y);
+ /* j=(j+1) & (~1) (see the cephes sources) */
+- imm2 = _mm256_add_epi32(imm2, _pi32_256_1.vi);
++ imm2 = _mathfun_mm256_add_epi32(imm2, _pi32_256_1.vi);
+ imm2 = _mm256_and_si256(imm2, _pi32_256_inv1.vi);
+ y = _mm256_cvtepi32_ps(imm2);
+- imm2 = _mm256_sub_epi32(imm2, _pi32_256_2.vi);
++ imm2 = _mathfun_mm256_sub_epi32(imm2, _pi32_256_2.vi);
+
+ /* get the swap sign flag */
+ imm0 = _mm256_andnot_si256(imm2, _pi32_256_4.vi);
+- imm0 = _mm256_slli_epi32(imm0, 29);
++ imm0 = _mathfun_mm256_slli_epi32(imm0, 29);
+ /* get the polynom selection mask */
+ imm2 = _mm256_and_si256(imm2, _pi32_256_2.vi);
+- imm2 = _mm256_cmpeq_epi32(imm2, _pi32_256_0.vi);
++ imm2 = _mathfun_mm256_cmpeq_epi32(imm2, _pi32_256_0.vi);
+ #else
+
+ /* we use SSE2 routines to perform the integer ops */
+@@ -615,7 +626,7 @@ void sincos256_ps(v8sf x, v8sf *s, v8sf *c) {
+ imm2 = _mm256_cvttps_epi32(y);
+
+ /* j=(j+1) & (~1) (see the cephes sources) */
+- imm2 = _mm256_add_epi32(imm2, _pi32_256_1.vi);
++ imm2 = _mathfun_mm256_add_epi32(imm2, _pi32_256_1.vi);
+ imm2 = _mm256_and_si256(imm2, _pi32_256_inv1.vi);
+
+ y = _mm256_cvtepi32_ps(imm2);
+@@ -623,12 +634,12 @@ void sincos256_ps(v8sf x, v8sf *s, v8sf *c) {
+
+ /* get the swap sign flag for the sine */
+ imm0 = _mm256_and_si256(imm2, _pi32_256_4.vi);
+- imm0 = _mm256_slli_epi32(imm0, 29);
++ imm0 = _mathfun_mm256_slli_epi32(imm0, 29);
+ //v8sf swap_sign_bit_sin = _mm256_castsi256_ps(imm0);
+
+ /* get the polynom selection mask for the sine*/
+ imm2 = _mm256_and_si256(imm2, _pi32_256_2.vi);
+- imm2 = _mm256_cmpeq_epi32(imm2, _pi32_256_0.vi);
++ imm2 = _mathfun_mm256_cmpeq_epi32(imm2, _pi32_256_0.vi);
+ //v8sf poly_mask = _mm256_castsi256_ps(imm2);
+ #else
+ /* we use SSE2 routines to perform the integer ops */
+@@ -678,9 +689,9 @@ void sincos256_ps(v8sf x, v8sf *s, v8sf *c) {
+ x = _mm256_add_ps(x, xmm3);
+
+ #ifdef __AVX2__
+- imm4 = _mm256_sub_epi32(imm4, _pi32_256_2.vi);
++ imm4 = _mathfun_mm256_sub_epi32(imm4, _pi32_256_2.vi);
+ imm4 = _mm256_andnot_si256(imm4, _pi32_256_4.vi);
+- imm4 = _mm256_slli_epi32(imm4, 29);
++ imm4 = _mathfun_mm256_slli_epi32(imm4, 29);
+ #else
+ imm4_1 = _mm_sub_epi32(imm4_1, _pi32avx_2.v4i);
+ imm4_2 = _mm_sub_epi32(imm4_2, _pi32avx_2.v4i);
+--
+cgit v0.11.2
+
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/macports-changes/attachments/20160409/f583b023/attachment.html>
More information about the macports-changes
mailing list