<pre style='margin:0'>
Herby Gillot (herbygillot) pushed a commit to branch master
in repository macports-ports.
</pre>
<p><a href="https://github.com/macports/macports-ports/commit/ad0b70302b368b8da1342fd586db04ffce1f6fe5">https://github.com/macports/macports-ports/commit/ad0b70302b368b8da1342fd586db04ffce1f6fe5</a></p>
<pre style="white-space: pre; background: #F8F8F8">The following commit(s) were added to refs/heads/master by this push:
<span style='display:block; white-space:pre;color:#404040;'> new ad0b70302b3 lame: add simd variant
</span>ad0b70302b3 is described below
<span style='display:block; white-space:pre;color:#808000;'>commit ad0b70302b368b8da1342fd586db04ffce1f6fe5
</span>Author: Sergey Fedorov <vital.had@gmail.com>
AuthorDate: Mon May 1 15:19:50 2023 +0800
<span style='display:block; white-space:pre;color:#404040;'> lame: add simd variant
</span>---
audio/lame/Portfile | 40 +-
audio/lame/files/lame-3.100-altivec-20171014.diff | 4003 +++++++++++++++++++
audio/lame/files/lame-3.100-altivec-20171217.diff | 4263 +++++++++++++++++++++
audio/lame/files/lame-3.100-neon-20230418.diff | 1593 ++++++++
audio/lame/files/lame-3.100-sse-20171014.diff | 1831 +++++++++
5 files changed, 11729 insertions(+), 1 deletion(-)
<span style='display:block; white-space:pre;color:#808080;'>diff --git a/audio/lame/Portfile b/audio/lame/Portfile
</span><span style='display:block; white-space:pre;color:#808080;'>index ec5702695d2..74e116be720 100644
</span><span style='display:block; white-space:pre;background:#e0e0ff;'>--- a/audio/lame/Portfile
</span><span style='display:block; white-space:pre;background:#e0e0ff;'>+++ b/audio/lame/Portfile
</span><span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -5,7 +5,7 @@ PortGroup legacysupport 1.0
</span>
name lame
version 3.100
<span style='display:block; white-space:pre;background:#ffe0e0;'>-revision 2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+revision 3
</span> checksums rmd160 e467c1f9458ca6878cd46e89fffce8970b9ea936 \
sha256 ddfe36cab873794038ae2c1210557ad34857a4b6bdc515785d1da9e175b1da1e \
size 1524133
<span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -36,4 +36,42 @@ patchfiles-append allow-deprecated.patch
</span>
configure.args --disable-gtktest
<span style='display:block; white-space:pre;background:#e0ffe0;'>+variant simd description "Optimize for the arch via SIMD" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+# https://tmkk.undo.jp/lame/index_e.html
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+# Updated PPC version from: https://github.com/classilla/lamevmx
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if {${build_arch} in [list arm arm64]} {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ patchfiles-append \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ lame-${version}-neon-20230418.diff
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } elseif {${build_arch} in [list i386 x86_64]} {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ patchfiles-append \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ lame-${version}-sse-20171014.diff
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } elseif {${build_arch} in [list ppc ppc64]} {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ patchfiles-append \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ lame-${version}-altivec-20171217.diff
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ post-patch {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ reinplace "s|@SYSROOT@|${developer_dir}/SDKs/MacOSX${macosx_sdk_version}.sdk|" \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ${worksrcpath}/configure
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if {${build_arch} eq "ppc64"} {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ reinplace "s|-arch ppc750 -arch ppc7400 -arch ppc970|-arch ppc970|" \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ${worksrcpath}/configure
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } elseif {${build_arch} eq "ppc"} {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # If building for ppc natively, a specific arch may be chosen here:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ reinplace "s|-arch ppc750 -arch ppc7400 -arch ppc970|-arch ppc|" \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ${worksrcpath}/configure
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if {${os.platform} eq "darwin" && ${os.major} == 10} {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # This is for Rosetta:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ configure.args-append \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ --build=powerpc-apple-darwin${os.major}.${os.minor}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # https://github.com/classilla/lamevmx/issues/1
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ configure.cflags-append -faltivec
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+if {![variant_isset universal] && ${os.arch} in [list arm powerpc i386]} {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ default_variants +simd
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span> livecheck.regex /${name}-(\\d+(?:\\.\\d+)+)${extract.suffix}
<span style='display:block; white-space:pre;color:#808080;'>diff --git a/audio/lame/files/lame-3.100-altivec-20171014.diff b/audio/lame/files/lame-3.100-altivec-20171014.diff
</span>new file mode 100644
<span style='display:block; white-space:pre;color:#808080;'>index 00000000000..42374b04459
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>--- /dev/null
</span><span style='display:block; white-space:pre;background:#e0e0ff;'>+++ b/audio/lame/files/lame-3.100-altivec-20171014.diff
</span><span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -0,0 +1,4003 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+# https://tmkk.undo.jp/lame/lame-3.100-altivec-20171014.diff
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/fft.c.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/fft.c 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -38,6 +38,12 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -66,6 +72,17 @@ fht(FLOAT * fz, int n)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT *fi, *gi;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const *fn;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float csvec[16] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vfi0,vfi1,vfi2,vfi3,vgi0,vgi1,vgi2,vgi3,vf0,vf1,vf2,vf3,vg0,vg1,vg2,vg3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vprev1,vprev2,vprev3,vprev4,vc1,vc2,vs1,vs2,vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm1,vperm2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(16,17,18,19,12,13,14,15,8,9,10,11,4,5,6,7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(16,17,18,19,4,5,6,7,8,9,10,11,12,13,14,15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ n <<= 1; /* to get BLKSIZE, because of 3DNow! ASM routine */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fn = fz + n;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -103,6 +120,238 @@ fht(FLOAT * fz, int n)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } while (fi < fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c1 = tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ s1 = tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(kx < 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 1; i < kx; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT c2, s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = 1 - (2 * s1) * s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = (2 * s1) * c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi = fz + i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi = fz + k1 - i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT a, b, g0, f0, f1, g1, f2, g2, f3, g3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b = s2 * fi[k1] - c2 * gi[k1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ a = c2 * fi[k1] + s2 * gi[k1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ f1 = fi[0] - a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ f0 = fi[0] + a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ g1 = gi[0] - b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ g0 = gi[0] + b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b = s2 * fi[k3] - c2 * gi[k3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ a = c2 * fi[k3] + s2 * gi[k3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ f3 = fi[k2] - a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ f2 = fi[k2] + a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ g3 = gi[k2] - b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ g2 = gi[k2] + b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b = s1 * f2 - c1 * g3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ a = c1 * f2 + s1 * g3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi[k2] = f0 - a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi[0] = f0 + a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi[k3] = g1 - b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi[k1] = g1 + b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b = c1 * g2 - s1 * f3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ a = s1 * g2 + c1 * f3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi[k2] = g0 - a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi[0] = g0 + a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi[k3] = f1 - b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi[k1] = f1 + b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (fi < fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT c2, s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(i = 1; i < 4; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = 1 - (2*s1)*s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = (2*s1)*c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[i] = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[i+4] = c2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[i+8] = s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[i+12] = s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_ld(0,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_ld(16,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = vec_ld(32,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = vec_ld(48,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi = fz;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi = fz + k1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi0 = vec_ld(0,fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi1 = vec_ld(0,fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi2 = vec_ld(0,fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi3 = vec_ld(0,fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev1 = vec_ld(0,gi-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev2 = vec_ld(0,gi+k1-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev3 = vec_ld(0,gi+k2-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev4 = vec_ld(0,gi+k3-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vec_perm(vprev1,vprev1,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vec_perm(vprev2,vprev2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vec_perm(vprev3,vprev3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vec_perm(vprev4,vprev4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vfi1,vc2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vfi1,vs2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vfi3,vc2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(vfi3,vs2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(vgi1,vs2,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_nmsub(vgi1,vc2,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(vgi3,vs2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_nmsub(vgi3,vc2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf0 = vec_add(vfi0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_sub(vfi0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg0 = vec_add(vgi0,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg1 = vec_sub(vgi0,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_add(vfi2,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sub(vfi2,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg2 = vec_add(vgi2,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg3 = vec_sub(vgi2,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vf2,vc1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vf2,vs1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vg2,vs1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(vg2,vc1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(vg3,vs1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_nmsub(vg3,vc1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(vf3,vc1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_nmsub(vf3,vs1,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_add(vf0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_sub(vf0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(vg1,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sub(vg1,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_add(vg0,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_sub(vg0,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vf1,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_sub(vf1,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,vfi0,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v10,vfi2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v15,vfi1,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v16,vfi3,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v1,0,fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v2,0,fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v3,0,fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v4,0,fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v11,vprev2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v12,vprev4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v13,vprev1,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v14,vprev3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v1,0,gi+k1-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v2,0,gi+k3-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v3,0,gi-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v4,0,gi+k2-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (fi<fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* rest loop */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 4; i < kx; i+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < 4; j++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = 1 - (2*s1)*s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = (2*s1)*c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[j] = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[j+4] = c2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[j+8] = s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[j+12] = s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_ld(0,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_ld(16,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = vec_ld(32,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = vec_ld(48,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi = fz + i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi = fz + k1 - i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi0 = vec_ld(0,fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi1 = vec_ld(0,fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi2 = vec_ld(0,fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi3 = vec_ld(0,fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev1 = vec_ld(0,gi-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,gi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev2 = vec_ld(0,gi+k1-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,gi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev3 = vec_ld(0,gi+k2-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,gi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev4 = vec_ld(0,gi+k3-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,gi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vec_perm(vprev1,v1,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vec_perm(vprev2,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vec_perm(vprev3,v3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vec_perm(vprev4,v4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vfi1,vc2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vfi1,vs2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vfi3,vc2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(vfi3,vs2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(vgi1,vs2,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_nmsub(vgi1,vc2,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(vgi3,vs2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_nmsub(vgi3,vc2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf0 = vec_add(vfi0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_sub(vfi0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg0 = vec_add(vgi0,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg1 = vec_sub(vgi0,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_add(vfi2,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sub(vfi2,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg2 = vec_add(vgi2,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg3 = vec_sub(vgi2,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vf2,vc1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vf2,vs1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vg2,vs1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(vg2,vc1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(vg3,vs1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_nmsub(vg3,vc1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(vf3,vc1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_nmsub(vf3,vs1,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_add(vf0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_sub(vf0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(vg1,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sub(vg1,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_add(vg0,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_sub(vg0,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vf1,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_sub(vf1,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,0,fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v15,0,fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v16,0,fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v11,vprev2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v12,vprev4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v13,vprev1,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v14,vprev3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v1,0,gi+k1-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v11,0,gi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v2,0,gi+k3-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v12,0,gi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v3,0,gi-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v13,0,gi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v4,0,gi+k2-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v14,0,gi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (fi<fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 1; i < kx; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT c2, s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c2 = 1 - (2 * s1) * s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -142,6 +391,7 @@ fht(FLOAT * fz, int n)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ tri += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } while (k4 < n);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/gain_analysis.c.orig 2017-10-11 04:08:39.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/gain_analysis.c 2017-10-14 18:04:59.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -92,6 +92,12 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <stdio.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <stdlib.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <string.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -109,6 +115,67 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*lint -save -e736 loss of precision */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const Float_t ABYule[9][2 * YULE_ORDER + 1 + 3] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.03857599435200, -3.84664617118067, -0.02160367184185, 7.81501653005538, -0.00123395316851,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -11.34170355132042, -0.00009291677959, 13.05504219327545, -0.01655260341619,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -12.28759895145294, 0.02161526843274, 9.48293806319790, -0.02074045215285, -5.87257861775999,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.00594298065125, 2.75465861874613, 0.00306428023191, -0.86984376593551, 0.00012025322027,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.13919314567432, 0.00288463683916, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.05418656406430, -3.47845948550071, -0.02911007808948, 6.36317777566148, -0.00848709379851,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -8.54751527471874, -0.00851165645469, 9.47693607801280, -0.00834990904936, -8.81498681370155,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.02245293253339, 6.85401540936998, -0.02596338512915, -4.39470996079559, 0.01624864962975,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 2.19611684890774, -0.00240879051584, -0.75104302451432, 0.00674613682247, 0.13149317958808,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00187763777362, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.15457299681924, -2.37898834973084, -0.09331049056315, 2.84868151156327, -0.06247880153653,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -2.64577170229825, 0.02163541888798, 2.23697657451713, -0.05588393329856, -1.67148153367602,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.04781476674921, 1.00595954808547, 0.00222312597743, -0.45953458054983, 0.03174092540049,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.16378164858596, -0.01390589421898, -0.05032077717131, 0.00651420667831, 0.02347897407020,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00881362733839, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.30296907319327, -1.61273165137247, -0.22613988682123, 1.07977492259970, -0.08587323730772,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.25656257754070, 0.03282930172664, -0.16276719120440, -0.00915702933434, -0.22638893773906,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02364141202522, 0.39120800788284, -0.00584456039913, -0.22138138954925, 0.06276101321749,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.04500235387352, -0.00000828086748, 0.02005851806501, 0.00205861885564, 0.00302439095741,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02950134983287, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.33642304856132, -1.49858979367799, -0.25572241425570, 0.87350271418188, -0.11828570177555,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.12205022308084, 0.11921148675203, -0.80774944671438, -0.07834489609479, 0.47854794562326,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00469977914380, -0.12453458140019, -0.00589500224440, -0.04067510197014, 0.05724228140351,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.08333755284107, 0.00832043980773, -0.04237348025746, -0.01635381384540, 0.02977207319925,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.01760176568150, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.44915256608450, -0.62820619233671, -0.14351757464547, 0.29661783706366, -0.22784394429749,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.37256372942400, -0.01419140100551, 0.00213767857124, 0.04078262797139, -0.42029820170918,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.12398163381748, 0.22199650564824, 0.04097565135648, 0.00613424350682, 0.10478503600251,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.06747620744683, -0.01863887810927, 0.05784820375801, -0.03193428438915, 0.03222754072173,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.00541907748707, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.56619470757641, -1.04800335126349, -0.75464456939302, 0.29156311971249, 0.16242137742230,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.26806001042947, 0.16744243493672, 0.00819999645858, -0.18901604199609, 0.45054734505008,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.30931782841830, -0.33032403314006, -0.27562961986224, 0.06739368333110, 0.00647310677246,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.04784254229033, 0.08647503780351, 0.01639907836189, -0.03788984554840, 0.01807364323573,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00588215443421, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.58100494960553, -0.51035327095184, -0.53174909058578, -0.31863563325245, -0.14289799034253,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.20256413484477, 0.17520704835522, 0.14728154134330, 0.02377945217615, 0.38952639978999,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.15558449135573, -0.23313271880868, -0.25344790059353, -0.05246019024463, 0.01628462406333,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02505961724053, 0.06920467763959, 0.02442357316099, -0.03721611395801, 0.01818801111503,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00749618797172, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.53648789255105, -0.25049871956020, -0.42163034350696, -0.43193942311114, -0.00275953611929,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.03424681017675, 0.04267842219415, -0.04678328784242, -0.10214864179676, 0.26408300200955,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.14590772289388, 0.15113130533216, -0.02459864859345, -0.17556493366449, -0.11202315195388,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.18823009262115, -0.04060034127000, 0.05477720428674, 0.04788665548180, 0.04704409688120,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02217936801134, 0.0, 0.0, 0.0}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const Float_t ABButter[9][2 * BUTTER_ORDER + 1 + 3] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.98621192462708, -1.97223372919527, -1.97242384925416, 0.97261396931306, 0.98621192462708, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.98500175787242, -1.96977855582618, -1.97000351574484, 0.97022847566350, 0.98500175787242, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97938932735214, -1.95835380975398, -1.95877865470428, 0.95920349965459, 0.97938932735214, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97531843204928, -1.95002759149878, -1.95063686409857, 0.95124613669835, 0.97531843204928, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97316523498161, -1.94561023566527, -1.94633046996323, 0.94705070426118, 0.97316523498161, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.96454515552826, -1.92783286977036, -1.92909031105652, 0.93034775234268, 0.96454515552826, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.96009142950541, -1.91858953033784, -1.92018285901082, 0.92177618768381, 0.96009142950541, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.95856916599601, -1.91542108074780, -1.91713833199203, 0.91885558323625, 0.95856916599601, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.94597685600279, -1.88903307939452, -1.89195371200558, 0.89487434461664, 0.94597685600279, 0.0, 0.0, 0.0}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const Float_t ABYule[9][multiple_of(4, 2 * YULE_ORDER + 1)] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* 20 18 16 14 12 10 8 6 4 2 0 19 17 15 13 11 9 7 5 3 1 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ { 0.00288463683916, 0.00012025322027, 0.00306428023191, 0.00594298065125, -0.02074045215285, 0.02161526843274, -0.01655260341619, -0.00009291677959, -0.00123395316851, -0.02160367184185, 0.03857599435200, 0.13919314567432, -0.86984376593551, 2.75465861874613, -5.87257861775999, 9.48293806319790,-12.28759895145294, 13.05504219327545,-11.34170355132042, 7.81501653005538, -3.84664617118067},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -134,6 +201,7 @@ static const Float_t ABButter[9][multipl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.95856916599601, 0.91885558323625, -1.91713833199203, -1.91542108074780, 0.95856916599601},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.94597685600279, 0.89487434461664, -1.89195371200558, -1.88903307939452, 0.94597685600279}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*lint -restore */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -143,6 +211,191 @@ static const Float_t ABButter[9][multipl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* When calling this procedure, make sure that ip[-order] and op[-order] point to real data! */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++filterIntegrated (const Float_t* input, Float_t* output, Float_t* output2, size_t nSamples, const Float_t* kernel, const Float_t* kernel2)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,vbase;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vmask1,vmask2,vout1,vout2,vout3,vout4,vzero,vkernel1,vkernel2,vkernel3,vkernel4,vkernel5,vkernel6,vkernel7,vkernel8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vo1, vo2, vo3, vo4, vi2, vi3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vc1,vc2,vc3,vc4,vc5,vperm1,vperm2,vperm4,vperm5,vperm6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbase = (vector float)VINIT4ALL(1e-10f);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(24,25,26,27,16,17,18,19,8,9,10,11,0,1,2,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(28,29,30,31,20,21,22,23,12,13,14,15,4,5,6,7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_sl(vc1,vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_sl(vc3,vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_or(vc3,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector float)vec_splat_s32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_sro(v1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_sro(v1,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(48,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(64,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(80,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel1 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel2 = vec_perm(v1,v2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel3 = vec_perm(v3,v4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel4 = vec_perm(v3,v4,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel5 = vec_perm(v5,v6,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel6 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel5 = vec_and(vkernel5,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel6 = vec_and(vkernel6,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,kernel2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,kernel2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel7 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel8 = vec_perm(v1,v2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel7 = vec_and(vkernel7,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel8 = vec_and(vkernel8,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_lvsl(0,input-7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = vec_lvsl(0,output-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,input-7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,input-7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,input-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(15,input-11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi2 = vec_perm(v2,v1,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vec_perm(v3,v4,vec_lvsl(0,input-10));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vec_sro(vi3,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,output-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,output-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,output-8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,output-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(15,output-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo1 = vec_perm(v2,v1,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo2 = vec_perm(v3,v2,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vec_perm(v4,v5,vec_lvsl(0,output-10));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vec_sro(vo3,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,output2-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,output2-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo4 = vec_perm(v2,v1,vec_lvsl(0,output2-2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo4 = vec_sro(vo4,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_lvsl(0,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = vec_lvsr(0,output);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* 1st loop */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v3,v1,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(v5,vkernel1,vbase);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo1,vkernel2,vbase);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(vi2,vkernel3,vout1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo2,vkernel4,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(vi3,vkernel5,vout1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo3,vkernel6,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vec_sld(vi3,vi2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi2 = vec_sld(vi2,v5,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_sub(vout1,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_slo(vout1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_slo(vout1,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_slo(vout1,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_add(vout1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_add(vout1,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vec_sld(vo3,vo2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo2 = vec_sld(vo2,vo1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo1 = vec_sld(vo1,vout1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_perm(vout1,vout1,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vout2,0,output);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++output;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++input;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ --nSamples;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while(nSamples--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_lvsl(0,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = vec_lvsr(0,output);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm6 = vec_lvsr(0,output2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v3,v1,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(v5,vkernel1,vbase);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo1,vkernel2,vbase);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(vi2,vkernel3,vout1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo2,vkernel4,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(vi3,vkernel5,vout1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo3,vkernel6,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_nmsub(vo4,vkernel8,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_madd(vo1,vkernel7,vout3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vec_sld(vi3,vi2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi2 = vec_sld(vi2,v5,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_sub(vout1,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_slo(vout1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_slo(vout1,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_slo(vout1,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_add(vout1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_add(vout1,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vec_sld(vo3,vo2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo2 = vec_sld(vo2,vo1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo1 = vec_sld(vo1,vout1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_slo(vout4,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_slo(vout4,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_slo(vout4,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_add(vout4,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_add(v5,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_add(vout3,vout4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo4 = vec_sld(vo4,vout3,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_perm(vout1,vout1,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_perm(vout3,vout3,vperm6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vout2,0,output);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vout4,0,output2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++output;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++output2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++input;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm6 = vec_lvsr(0,output2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_nmsub(vo4,vkernel8,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_madd(vo1,vkernel7,vout3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_slo(vout4,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_slo(vout4,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_slo(vout4,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_add(vout4,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_add(vout3,vout4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_perm(vout3,vout3,vperm6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vout4,0,output2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ filterYule(const Float_t * input, Float_t * output, size_t nSamples, const Float_t * const kernel)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -189,7 +442,7 @@ filterButter(const Float_t * input, Floa
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int ResetSampleFrequency(replaygain_t * rgData, long samplefreq);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -323,6 +576,10 @@ AnalyzeSamples(replaygain_t * rgData, co
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curright = right_samples + cursamplepos;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ filterIntegrated(curleft, rgData->lstep + rgData->totsamp, rgData->lout + rgData->totsamp, cursamples, ABYule[rgData->freqindex], ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ filterIntegrated(curright, rgData->rstep + rgData->totsamp, rgData->rout + rgData->totsamp, cursamples, ABYule[rgData->freqindex], ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ YULE_FILTER(curleft, rgData->lstep + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABYule[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ YULE_FILTER(curright, rgData->rstep + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -332,6 +589,7 @@ AnalyzeSamples(replaygain_t * rgData, co
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ BUTTER_FILTER(rgData->rstep + rgData->totsamp, rgData->rout + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curleft = rgData->lout + rgData->totsamp; /* Get the squared values */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curright = rgData->rout + rgData->totsamp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/lame.c.orig 2017-10-11 04:08:39.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/lame.c 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -30,6 +30,11 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -603,7 +608,12 @@ lame_init_params(lame_global_flags * gfp
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->CPU_features.SSE = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->CPU_features.SSE2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* turn off JAVA mode explicitly */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned short vscr = vec_mfvscr();
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vscr = vec_or(vscr,(vector unsigned short)VINIT8(0,0,0,0,0,0,1,0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_mtvscr(vscr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ cfg->vbr = gfp->VBR;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ cfg->error_protection = gfp->error_protection;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/machine.h.orig 2017-10-11 04:08:39.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/machine.h 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -184,6 +184,24 @@ typedef FLOAT sample_t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT4(a,b,c,d) (a,b,c,d)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT8(a,b,c,d,e,f,g,h) (a,b,c,d,e,f,g,h)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT16(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) (a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT4ALL(a) (a,a,a,a)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT8ALL(a) (a,a,a,a,a,a,a,a)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT16ALL(a) (a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT4(a,b,c,d) {a,b,c,d}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT8(a,b,c,d,e,f,g,h) {a,b,c,d,e,f,g,h}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT16(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT4ALL(a) {a,a,a,a}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT8ALL(a) {a,a,a,a,a,a,a,a}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT16ALL(a) {a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* end of machine.h */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/newmdct.c.orig 2011-05-08 01:05:17.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/newmdct.c 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -30,6 +30,12 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -39,7 +45,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifndef USE_GOGO_SUBBAND
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const FLOAT enwindow[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const FLOAT enwindow[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -4.77e-07 * 0.740951125354959 / 2.384e-06, 1.03951e-04 * 0.740951125354959 / 2.384e-06,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 9.53674e-04 * 0.740951125354959 / 2.384e-06, 2.841473e-03 * 0.740951125354959 / 2.384e-06,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 3.5758972e-02 * 0.740951125354959 / 2.384e-06, 3.401756e-03 * 0.740951125354959 / 2.384e-06, 9.83715e-04 * 0.740951125354959 / 2.384e-06, 9.9182e-05 * 0.740951125354959 / 2.384e-06, /* 15 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -230,7 +236,7 @@ static const FLOAT enwindow[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define NS 12
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define NL 36
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const FLOAT win[4][NL] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const FLOAT win[4][NL] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 2.382191739347913e-13,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 6.423305872147834e-13,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -435,6 +441,443 @@ window_subband(const sample_t * x1, FLOA
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const sample_t *x2 = &x1[238 - 14 - 286];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vw1,vw2,vw3,vw4,vw5,vw6,vw7,vw8,vs,vt,vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm2,vperm3,vperm4,vperm5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = (vector unsigned char)VINIT16(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = vec_lvsl(0,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm3 = (vector unsigned char)VINIT16(0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_lvsl(0,x1+1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_perm(vperm4,vperm4,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(i=0;i<3;i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wp-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,wp-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(16,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(32,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+26);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wp+26);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(16,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(32,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v6,v7,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v8,v9,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_mergeh(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_mergeh(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergel(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergel(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_mergel(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_mergel(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw5 = vec_mergeh(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw6 = vec_mergel(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw3 = vec_mergeh(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw7 = vec_mergeh(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw4 = vec_mergel(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw8 = vec_mergel(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-224);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw1,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+221);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+221);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw1,v6,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-160);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw2,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+157);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+157);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw2,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-96);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw3,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+93);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+93);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw3,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw4,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+29);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+29);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw4,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw5,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-35);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-35);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw5,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+96);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw6,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-99);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-99);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw6,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+160);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw7,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-163);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-163);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw7,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+224);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw8,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-227);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-227);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw8,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wp-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,wp-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(16,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(32,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wp+34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(16,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(32,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v6,v7,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v8,v9,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_mergeh(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_mergeh(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergel(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergel(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_mergel(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_mergel(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw5 = vec_mergeh(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw6 = vec_mergel(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw3 = vec_mergeh(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw7 = vec_mergeh(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw4 = vec_mergel(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw8 = vec_mergel(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+256);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw1,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-259);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-259);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw1,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+192);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw2,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-195);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-195);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw2,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+128);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw3,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-131);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-131);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw3,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+64);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw4,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-67);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-67);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw4,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw5,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw5,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-64);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw6,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+61);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+61);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw6,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-128);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw7,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+125);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+125);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw7,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-192);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw8,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+189);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+189);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw8,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*end*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,wp+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16,wp+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v4,v5,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(0,wp+42);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,wp+60);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_ld(16,wp+60);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v10,v11,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergeh(v3,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergeh(v6,v12);;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vs,vw1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sub(vt,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(vt,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vw2,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_mergeh(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_mergel(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v4,0,a+i*8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v5,16,a+i*8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wp += 72;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1-=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2+=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wp-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,wp-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(16,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(32,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+26);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wp+26);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(16,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(32,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v6,v7,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v8,v9,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_mergeh(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_mergeh(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergel(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergel(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_mergel(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_mergel(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw5 = vec_mergeh(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw6 = vec_mergel(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw3 = vec_mergeh(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw7 = vec_mergeh(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw4 = vec_mergel(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw8 = vec_mergel(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-224);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw1,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+221);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+221);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw1,v6,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-160);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw2,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+157);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+157);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw2,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-96);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw3,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+93);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+93);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw3,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw4,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+29);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+29);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw4,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw5,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-35);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-35);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw5,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+96);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw6,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-99);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-99);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw6,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+160);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw7,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-163);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-163);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw7,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+224);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw8,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-227);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-227);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw8,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wp-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,wp-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(16,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(32,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wp+34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(16,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(32,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v6,v7,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v8,v9,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_mergeh(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_mergeh(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergel(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergel(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_mergel(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_mergel(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw5 = vec_mergeh(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw6 = vec_mergel(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw3 = vec_mergeh(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw7 = vec_mergeh(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw4 = vec_mergel(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw8 = vec_mergel(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+256);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw1,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-259);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-259);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw1,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+192);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw2,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-195);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-195);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw2,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+128);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw3,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-131);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-131);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw3,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+64);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw4,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-67);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-67);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw4,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw5,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw5,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-64);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw6,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+61);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+61);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw6,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-128);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw7,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+125);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+125);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw7,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-192);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw8,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+189);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+189);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw8,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*end*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,wp+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16,wp+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v4,v5,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(0,wp+42);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,wp+60);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_ld(16,wp+60);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v10,v11,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergeh(v3,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergeh(v6,v12);;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vs,vw1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sub(vt,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(vt,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vw2,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,a+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_mergeh(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_mergel(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v6,v4,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v5,0,a+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v7,16,a+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wp += 54;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1-=3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2+=3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = -15; i < 0; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT w, s, t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -501,6 +944,7 @@ window_subband(const sample_t * x1, FLOA
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x1--;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x2++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT s, t, u, v;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ t = x1[-16] * wp[-10];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/psymodel.c.orig 2017-09-07 04:38:23.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/psymodel.c 2017-10-14 18:10:00.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -146,6 +146,12 @@ blocktype_d[2] block type to use
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <float.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -164,6 +170,48 @@ blocktype_d[2] block type to use
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define LN_TO_LOG10 0.2302585093
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline vector float fast_log10_altivec_2(vector float v3)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float va,vb,vc,vhalf,vzero,vsqrt2,vconst4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v4,v5,v6,v7,v8,vz,vz2,vlog;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vconst1,vconst2,vshamt;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vconst3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = (vector float)VINIT4ALL(0.8685890659);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = (vector float)VINIT4ALL(0.2894672153);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc = (vector float)VINIT4ALL(0.1793365895);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = (vector float)VINIT4ALL(0.15051499783);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(1.4142135623731);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst4 = (vector float)VINIT4ALL(0.301029995664);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst1 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(9));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_nor(vconst2,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst3 = (vector signed int)vec_rl(vconst2,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vec_splat_u32(9),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vshamt,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_sl((vector unsigned int)vconst3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector float)vec_sel(vconst2,(vector unsigned int)v3,vconst1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz = vec_madd(v6, vec_madd(vec_nmsub(v7,v5,(vector float)vconst2),v7,v7), vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = (vector float)vec_sr((vector unsigned int)v3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_ctf(vec_sub((vector signed int)v8,vconst3),0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz2 = vec_madd(vz,vz,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vlog,vconst4,vhalf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vz2,vc,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vz2,v1,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vz,v2,vlog);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return vlog;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ L3psycho_anal. Compute psycho acoustics.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -253,6 +301,11 @@ static const FLOAT ma_max_i1 = 3.6517412
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const FLOAT ma_max_i2 = 31.622776601683793;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* pow(10, (MLIMIT) / 10.0); */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const FLOAT ma_max_m = 31.622776601683793;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const vector float vmamax1 = (vector float)VINIT4ALL(3.651741);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const vector float vmamax2 = (vector float)VINIT4ALL(31.622777);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*This is the masking table:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ According to tonality, values are going from 0dB (TMN)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -666,6 +719,14 @@ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2], int chn,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int gr_out, FLOAT fftenergy[HBLKSIZE], FLOAT(*wsamp_l)[BLKSIZE])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,vhalf,vprev,vzero,vsqrt2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = vec_ctf(vec_splat_s32(1),1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(0.7071067811865001);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm = (vector unsigned char)VINIT16(0,1,2,3,28,29,30,31,24,25,26,27,20,21,22,23);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -675,19 +736,80 @@ vbrpsy_compute_fft_l(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fft_long(gfc, *wsamp_l, chn, buffer);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else if (chn == 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* FFT data for mid and side channel is derived from L & R */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < BLKSIZE; j += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wsamp_l[0]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,wsamp_l[1]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(16,wsamp_l[0]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wsamp_l[1]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sub(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v5,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v6,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v7,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v8,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,wsamp_l[0]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,0,wsamp_l[1]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v11,16,wsamp_l[0]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v12,16,wsamp_l[1]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const l = wsamp_l[0][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const r = wsamp_l[1][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_l[0][j] = (l + r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_l[1][j] = (l - r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * compute energies
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vec_ld(0,(*wsamp_l));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < BLKSIZE/2; j += 16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,(*wsamp_l)+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,(*wsamp_l)+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,(*wsamp_l)+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(48,(*wsamp_l)+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(48,(*wsamp_l)+1008-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(32,(*wsamp_l)+1008-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(16,(*wsamp_l)+1008-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,(*wsamp_l)+1008-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(vprev,v5,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v5,v6,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v6,v7,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v7,v8,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(v1,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(v2,v2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(v3,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(v4,v4,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(v9,v9,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_madd(v10,v10,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(v11,v11,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v12,v12,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v5,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v6,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v7,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v8,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,16,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v11,32,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v12,48,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vprev,vprev,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v1,0,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[0] = wsamp_l[0][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[0] *= fftenergy[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -696,13 +818,51 @@ vbrpsy_compute_fft_l(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const im = (*wsamp_l)[BLKSIZE / 2 + j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[BLKSIZE / 2 - j] = (re * re + im * im) * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* total energy */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,fftenergy+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0,fftenergy+508);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(0,fftenergy+512);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_xor(v8,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_sld(v5,v8,12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_sld(v8,v7,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,fftenergy+11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0,fftenergy+508);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_lde(0,fftenergy+512);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_xor(v8,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=12;j<508;j+=16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(48,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v1,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_add(v2,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v3,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_add(v4,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v5,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sld(v5,v5,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_sld(v5,v5,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sld(v5,v5,12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v5,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v5,vec_lvsr(0, psv->tot_ener+chn));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v5,0,psv->tot_ener+chn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT totalenergy = 0.0f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = 11; j < HBLKSIZE; j++)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ totalenergy += fftenergy[j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ psv->tot_ener[chn] = totalenergy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (plt) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -716,27 +876,96 @@ vbrpsy_compute_fft_l(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ vbrpsy_compute_fft_s(lame_internal_flags const *gfc, const sample_t * const buffer[2], int chn,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- int sblock, FLOAT(*fftenergy_s)[HBLKSIZE_s], FLOAT(*wsamp_s)[3][BLKSIZE_s])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int sblock, FLOAT(*fftenergy_s)[HBLKSIZE_s+3], FLOAT(*wsamp_s)[3][BLKSIZE_s])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,vhalf,vprev,vzero,vsqrt2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = vec_ctf(vec_splat_s32(1),1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(0.7071067811865001);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm = (vector unsigned char)VINIT16(0,1,2,3,28,29,30,31,24,25,26,27,20,21,22,23);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (sblock == 0 && chn < 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fft_short(gfc, *wsamp_s, chn, buffer);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (chn == 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* FFT data for mid and side channel is derived from L & R */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < BLKSIZE_s; j += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wsamp_s[0][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,wsamp_s[1][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(16,wsamp_s[0][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wsamp_s[1][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sub(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v5,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v6,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v7,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v8,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,wsamp_s[0][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,0,wsamp_s[1][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v11,16,wsamp_s[0][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v12,16,wsamp_s[1][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE_s - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const l = wsamp_s[0][sblock][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const r = wsamp_s[1][sblock][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_s[0][sblock][j] = (l + r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_s[1][sblock][j] = (l - r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * compute energies
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vec_ld(0,(*wsamp_s)[sblock]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < BLKSIZE_s/2; j += 16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,(*wsamp_s)[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,(*wsamp_s)[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,(*wsamp_s)[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(48,(*wsamp_s)[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(48,(*wsamp_s)[sblock]+240-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(32,(*wsamp_s)[sblock]+240-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(16,(*wsamp_s)[sblock]+240-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,(*wsamp_s)[sblock]+240-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(vprev,v5,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v5,v6,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v6,v7,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v7,v8,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(v1,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(v2,v2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(v3,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(v4,v4,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(v9,v9,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_madd(v10,v10,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(v11,v11,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v12,v12,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v5,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v6,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v7,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v8,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,16,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v11,32,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v12,48,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vprev,vprev,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v1,0,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy_s[sblock][0] = (*wsamp_s)[sblock][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy_s[sblock][0] *= fftenergy_s[sblock][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE_s / 2 - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -744,6 +973,7 @@ vbrpsy_compute_fft_s(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const im = (*wsamp_s)[sblock][BLKSIZE_s / 2 + j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy_s[sblock][BLKSIZE_s / 2 - j] = (re * re + im * im) * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -772,7 +1002,24 @@ vbrpsy_attack_detection(lame_internal_fl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT energy[4], FLOAT sub_short_factor[4][3], int ns_attacks[4][4],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int uselongblock[2])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT ns_hpfsmpl[2][576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vsum,vsum1,vsum2,vsuma,vsumb,vsumc,vsumd,vmaska,vmaskb,vmaskc,vmaskd;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask1,vmask2,vmask3,vmask4,vmask1inv,vmask2inv,vmask3inv,vmask4inv,vperm,vs4,vs8,vs12;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm = (vector unsigned char)VINIT16(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector float)vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector float)vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs4 = vec_sl((vector unsigned char)v1,(vector unsigned char)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs8 = vec_sl(vs4,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs12 = vec_or(vs4,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector float)vec_splat_s32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmaska = vec_slo(v3,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmaskb = vec_sro(vmaska,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmaskc = vec_sro(vmaska,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmaskd = vec_sro(vmaska,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT ns_hpfsmpl[2][576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *const psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -785,14 +1032,142 @@ vbrpsy_attack_detection(lame_internal_fl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Don't copy the input buffer into a temporary buffer */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* unroll the loop 2 times */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (chn = 0; chn < n_chn_out; chn++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- static const FLOAT fircoef[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ static const FLOAT fircoef[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- -5.52212e-17 * 2, -0.313819 * 2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -5.52212e-17 * 2, -0.313819 * 2, 0.0, 0.0
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* apply high pass filter of fs/4 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const sample_t *const firbuf = &buffer[chn][576 - 350 - NSFIRLEN + 192];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0, firbuf+10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_lvsl(0, firbuf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_lvsl(0, firbuf+1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask3 = vec_lvsl(0, firbuf+2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask4 = vec_lvsl(0, firbuf+3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1inv = vec_perm(vmask1,vmask1,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2inv = vec_perm(vmask2,vmask2,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask3inv = vec_perm(vmask3,vmask3,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask4inv = vec_perm(vmask4,vmask4,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(i=0;i<576;) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,firbuf+i+10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(v1, v2, vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_splat(vsum1, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_and(vsum2, vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,firbuf+i+NSFIRLEN-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=0;j<(NSFIRLEN-1)/2;j+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16, firbuf+i+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0, firbuf+i+NSFIRLEN-3-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v5,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v4,vmask3inv);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,fircoef+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_madd(v10,v11,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_slo(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_slo(vsum,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_slo(vsum,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vsum,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_add(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsuma = vec_add(v15,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsuma = vec_and(vsuma,vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_splat(vsum1, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_and(vsum2, vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,firbuf+i+NSFIRLEN-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_lvsl(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=0;j<(NSFIRLEN-1)/2;j+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16, firbuf+i+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0, firbuf+i+NSFIRLEN-3-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v5,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v4,vmask4inv);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,fircoef+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_madd(v10,v11,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sro(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_slo(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_slo(vsum,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vsum,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_add(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumb = vec_add(v15,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumb = vec_and(vsumb,vmaskb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_splat(vsum1, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_and(vsum2, vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,firbuf+i+NSFIRLEN-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_lvsl(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=0;j<(NSFIRLEN-1)/2;j+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16, firbuf+i+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0, firbuf+i+NSFIRLEN-3-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v5,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v4,vmask1inv);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,fircoef+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_madd(v10,v11,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sro(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sro(vsum,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_slo(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vsum,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_add(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumc = vec_add(v15,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumc = vec_and(vsumc,vmaskc);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_splat(vsum1, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_and(vsum2, vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,firbuf+i+NSFIRLEN-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_lvsl(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=0;j<(NSFIRLEN-1)/2;j+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16, firbuf+i+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0, firbuf+i+NSFIRLEN-3-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v5,vmask4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v4,vmask2inv);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,fircoef+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_madd(v10,v11,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sro(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sro(vsum,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_sro(vsum,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vsum,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_add(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumd = vec_add(v15,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumd = vec_and(vsumd,vmaskd);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_or(vsuma,vsumb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_or(vsumc,vsumd);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_or(vsum1,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vsum,0,ns_hpfsmpl[chn]+i-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i < 576; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT sum1, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ sum1 = firbuf[i + 10];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -803,6 +1178,7 @@ vbrpsy_attack_detection(lame_internal_fl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ns_hpfsmpl[chn][i] = sum1 + sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ masking_ratio[gr_out][chn].en = psv->en[chn];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ masking_ratio[gr_out][chn].thm = psv->thm[chn];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (n_chn_psy > 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -841,9 +1217,28 @@ vbrpsy_attack_detection(lame_internal_fl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i < 9; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const *const pfe = pf + 576 / 9;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT p = 1.;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT vmax[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector float)vec_splat_s32(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ctf((vector signed int)v1,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; pf < pfe; pf+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,pf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_abs(v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_max(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_slo(v2,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_slo(v2,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_slo(v2,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_max(v2,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_max(v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 =vec_max(v8,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,0,vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ p = vmax[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (; pf < pfe; pf++)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (p < fabs(*pf))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ p = fabs(*pf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ psv->last_en_subshort[chn][i] = en_subshort[i + 3] = p;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ en_short[1 + i / 3] += p;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (p > en_subshort[i + 3 - 2]) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1039,7 +1434,7 @@ vbrpsy_calc_mask_index_s(lame_internal_f
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-vbrpsy_compute_masking_s(lame_internal_flags * gfc, const FLOAT(*fftenergy_s)[HBLKSIZE_s],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++vbrpsy_compute_masking_s(lame_internal_flags * gfc, const FLOAT(*fftenergy_s)[HBLKSIZE_s+3],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT * eb, FLOAT * thr, int chn, int sblock)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *const psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1147,24 +1542,286 @@ vbrpsy_compute_masking_l(lame_internal_f
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *const psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT max[CBANDS], avg[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- unsigned char mask_idx_l[CBANDS + 2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT max[CBANDS] __attribute__ ((aligned (16))), avg[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned char mask_idx_l[CBANDS + 2] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int k, b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float tmp[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char v31 = (vector unsigned char)VINIT16ALL(31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vmask1 = (vector unsigned int)VINIT4ALL(0xff);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector signed int vone = (vector signed int)VINIT4ALL(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtab1 = (vector unsigned int)VINIT4(0x3f800000,0x3f4b5936,0x3f218698,0x3f218698);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtab2 = (vector unsigned int)VINIT4(0x3f218698,0x3f218698,0x3f218698,0x3e809bfa);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtab3 = (vector unsigned int)VINIT4(0x3df09e99,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtable1 = (vector unsigned int)VINIT4(0x3fe39e89,0x3fec53e5,0x3ff55ea7,0x3ff9149b);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtable2 = (vector unsigned int)VINIT4(0x3ffcd90e,0x3fea8f7b,0x3fd997da,0x3fbf84e2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtable3 = (vector unsigned int)VINIT4(0x3fa8917c,0x3f800000,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Calculate the energy and the tonality of each partition.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ calc_energy(gdl, fftenergy, eb_l, max, avg);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ calc_mask_index_l(gfc, max, avg, mask_idx_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char vmaskidx1 = vec_ld(0,mask_idx_l); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char vmaskidx2 = vec_ld(16,mask_idx_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char vmaskidx3 = vec_ld(32,mask_idx_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char vmaskidx4 = vec_ld(48,mask_idx_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = gfc->sv_qnt.masking_lower;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vmasking_lower_coeff = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmasking_lower_coeff = vec_splat(vmasking_lower_coeff,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * convolve the partitioned energy and unpredictability
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * with the spreading function, s3_l[b][k]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ k = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- for (b = 0; b < gdl->npart; b++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (b = 0; b < gdl->npart-3; b+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v1,v2,v3,v4,v5,vkk,vkk2,vlast,vdd,vdd_n,vk,vk2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vf1,vf2,vf3,vf4,vecb,vx,veb,vavgmask,vmasking_lower;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmasking_lower = vec_ld(0,gdl->masking_lower+b);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmasking_lower = vec_madd(vmasking_lower,vmasking_lower_coeff,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int tmp2[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int tmp3[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,gdl->s3ind[b]); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,gdl->s3ind[b+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,gdl->s3ind[b+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,gdl->s3ind[b+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkk = vec_mergeh(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlast = vec_mergel(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sub(vlast,vkk);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sel(v1,(vector signed int)vzero,vec_cmpgt((vector signed int)vzero,v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v1,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[0] = k;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[1] = k+tmp2[0]+1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[2] = k+tmp2[0]+tmp2[1]+2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[3] = k+tmp2[0]+tmp2[1]+tmp2[2]+3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ k = k+tmp2[0]+tmp2[1]+tmp2[2]+tmp2[3]+4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vk = vec_ld(0,tmp3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_perm(vmaskidx1,vmaskidx2,(vector unsigned char)vkk);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_perm(vmaskidx3,vmaskidx4,(vector unsigned char)vkk);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_sel(v1,v2,vec_cmpgt(vkk,(vector signed int)VINIT4ALL(31)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_and(vdd,(vector signed int)vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd_n = vone;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = gdl->s3[tmp3[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[1] = gdl->s3[tmp3[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[2] = gdl->s3[tmp3[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[3] = gdl->s3[tmp3[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vkk,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = eb_l[tmp2[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[1] = eb_l[tmp2[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[2] = eb_l[tmp2[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[3] = eb_l[tmp2[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ veb = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vecb = vec_madd(vf1,veb,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(vdd,vec_splat_u32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(v1,vec_splat_s32(1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_add(v1,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(-8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sl(v2,vec_splat_u32(-16));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sl(v3,vec_splat_u32(8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_or(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = (vector float)vec_perm(vtab1,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = (vector float)vec_perm(vtab3,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_sel(vf1,vf2,(vector unsigned int)vec_cmpgt((vector unsigned char)v1,v31));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vecb = vec_madd(vecb,vf2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkk = vec_add(vkk,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vk = vec_add(vk,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while(vec_any_le(vkk,vlast)) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkk2 = vec_sel(vkk,vlast,vec_cmpgt(vkk,vlast));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vk2 = vec_sel(vk,(vector signed int)vzero,vec_cmpgt(vkk,vlast));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_perm(vmaskidx1,vmaskidx2,(vector unsigned char)vkk2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_perm(vmaskidx3,vmaskidx4,(vector unsigned char)vkk2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sel(v1,v2,vec_cmpgt(vkk2,(vector signed int)VINIT4ALL(31)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_and(v1,(vector signed int)vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_cmpgt(vkk,vlast);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_nor(v2,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_and(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_and(vone,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_add(vdd,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd_n = vec_add(vdd_n,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vk2,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = gdl->s3[tmp2[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[1] = gdl->s3[tmp2[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[2] = gdl->s3[tmp2[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[3] = gdl->s3[tmp2[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vkk,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = eb_l[tmp2[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[1] = eb_l[tmp2[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[2] = eb_l[tmp2[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[3] = eb_l[tmp2[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ veb = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_madd(vf1,veb,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v5,vec_splat_u32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(v1,vec_splat_s32(1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_add(v1,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(-8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sl(v2,vec_splat_u32(-16));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sl(v3,vec_splat_u32(8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_or(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = (vector float)vec_perm(vtab1,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = (vector float)vec_perm(vtab3,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_sel(vf1,vf2,(vector unsigned int)vec_cmpgt((vector unsigned char)v1,v31));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_madd(vx,vf2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vratio,vout,vf5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_sel(vecb,vzero,vec_cmplt(vecb,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_sel(vx,vzero,vec_cmplt(vx,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf1,vf2,vec_cmpgt(vf2,vf1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf4 = vec_sel(vf2,vf1,vec_cmpgt(vf2,vf1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = vec_re(vf4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vratio = vec_madd(vf3,vec_madd(vec_nmsub(vf4,vf5,(vector float)VINIT4ALL(1.0)),vf5,vf5),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2[0] = b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2[1] = b+1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2[2] = b+2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2[3] = b+3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[0] = mask_add_delta(mask_idx_l[b]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[1] = mask_add_delta(mask_idx_l[b+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[2] = mask_add_delta(mask_idx_l[b+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[3] = mask_add_delta(mask_idx_l[b+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sub(vkk2,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,tmp3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_abs(v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = (vector signed int)vec_cmpgt(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector signed int)vec_cmpge(vratio,vmamax1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf4 = vec_add(vf1,vf2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(vec_any_eq(vec_or(v5,v3),(vector signed int)vzero)) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = fast_log10_altivec_2(vratio);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_cts(vf3,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(v1,vec_splat_s32(1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_add(v1,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(-8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sl(v2,vec_splat_u32(-16));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sl(v3,vec_splat_u32(8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_or(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = (vector float)vec_perm(vtable1,vtable2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = (vector float)vec_perm(vtable3,vtable2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = vec_sel(vf3,vf5,(vector unsigned int)vec_cmpgt((vector unsigned char)v1,v31));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = vec_madd(vf4,vf5,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = vec_sel(vf5,vf4,vec_cmpge(vratio,vmamax1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ else vf5 = vf4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vf1,vf2,vec_cmpgt(vf2,vf1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vout,vf4,vec_cmpgt(vmamax2,vratio));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vf5,vout,(vector unsigned int)v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vout,vecb,(vector unsigned int)vec_cmple(vx,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vout,vx,(vector unsigned int)vec_cmple(vecb,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vecb = vec_sel(vout,vecb,vec_cmpgt(vkk,vlast));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkk = vec_add(vkk,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vk = vec_add(vk,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_sl(vdd,(vector unsigned int)vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd_n = vec_sl(vdd_n,(vector unsigned int)vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_add(vdd,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ctf(vdd,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_ctf(vdd_n,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_re(vf2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_madd(vf1,vf2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_cts(vf1,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(vdd,vec_splat_u32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(v1,vec_splat_s32(1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_add(v1,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(-8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sl(v2,vec_splat_u32(-16));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sl(v3,vec_splat_u32(8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_or(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = (vector float)vec_perm(vtab1,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = (vector float)vec_perm(vtab3,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_sel(vf1,vf2,(vector unsigned int)vec_cmpgt((vector unsigned char)v1,v31));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_ctf(vone,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vavgmask = vec_madd(vf1,vf2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vecb = vec_madd(vecb,vavgmask,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf4 = vec_ld(0,eb_l+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (psv->blocktype_old[chn & 0x01] == SHORT_TYPE) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,psv->nb_l1[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_madd(vf1,(vector float)VINIT4ALL(rpelev),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_madd(vf4,(vector float)VINIT4ALL(NS_PREECHO_ATT2),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf2,vf3,vec_cmpgt(vf3,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_min(vecb,vf3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //vec_st(vf3,0,thr+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,psv->nb_l1[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_ld(0,psv->nb_l2[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_madd(vf1,(vector float)VINIT4ALL(rpelev),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_madd(vf2,(vector float)VINIT4ALL(rpelev2),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vzero,vf3,vec_cmpgt(vf3,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_sel(vzero,vf2,vec_cmpgt(vf2,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (psv->blocktype_old[chn & 0x01] == NORM_TYPE) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_min(vf3,vf2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_min(vecb,vf3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //vec_st(vf3,0,thr+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vf1,0,psv->nb_l2[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vecb,0,psv->nb_l1[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_ld(0,max+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,gdl->minval+b);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_madd(vx,vf1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_madd(vx,vavgmask,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf3,vx,vec_cmpgt(vf3,vx));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //vec_st(vf3,0,thr+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_cmpgt(vmasking_lower,(vector float)VINIT4ALL(1.0f));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_madd(vf3,vmasking_lower,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf3,vf1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf3,vf4,vec_cmpgt(vf3,vf4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_madd(vf3,vmasking_lower,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf1,vf3,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vf3,0,thr+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b=0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; b < gdl->npart; b++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT x, ecb, avg_mask, t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const masking_lower = gdl->masking_lower[b] * gfc->sv_qnt.masking_lower;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //fprintf(stderr,"%f\n",masking_lower);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* convolve the partitioned energy with the spreading function */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int kk = gdl->s3ind[b][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int const last = gdl->s3ind[b][1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1423,11 +2080,11 @@ L3psycho_anal_vbr(lame_internal_flags *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* fft and energy calculation */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT(*wsamp_l)[BLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT(*wsamp_s)[3][BLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT fftenergy[HBLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT fftenergy_s[3][HBLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT wsamp_L[2][BLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT wsamp_S[2][3][BLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT eb[4][CBANDS], thr[4][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT fftenergy[HBLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT fftenergy_s[3][HBLKSIZE_s+3] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT wsamp_L[2][BLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT wsamp_S[2][3][BLKSIZE_s] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT eb[4][CBANDS] __attribute__ ((aligned (16))), thr[4][CBANDS] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT sub_short_factor[4][3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT thmm;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1436,7 +2093,7 @@ L3psycho_anal_vbr(lame_internal_flags *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ (cfg->msfix > 0.f) ? (cfg->ATH_offset_factor * gfc->ATH->adjust_factor) : 1.f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const FLOAT(*const_eb)[CBANDS] = (const FLOAT(*)[CBANDS]) eb;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- const FLOAT(*const_fftenergy_s)[HBLKSIZE_s] = (const FLOAT(*)[HBLKSIZE_s]) fftenergy_s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const FLOAT(*const_fftenergy_s)[HBLKSIZE_s+3] = (const FLOAT(*)[HBLKSIZE_s+3]) fftenergy_s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* block type */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int ns_attacks[4][4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1824,7 +2481,7 @@ compute_bark_values(PsyConst_CB2SB_t con
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-init_s3_values(FLOAT ** p, int (*s3ind)[2], int npart,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++init_s3_values(FLOAT ** p, int (*s3ind)[4], int npart,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const *bval, FLOAT const *bval_width, FLOAT const *norm)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT s3[CBANDS][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/quantize.c.orig 2017-08-15 22:40:45.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/quantize.c 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -28,6 +28,12 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -42,7 +48,26 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef PPC_FRSQRTE
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline double __frsqrte(double number)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ double y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ asm("frsqrte %0,%1" : "=f" (y) : "f" (number));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline double ppc_sqrt(double x) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ double y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const double halfx = 0.5 * x;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y = __frsqrte(x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y *= 1.5 - halfx * y * y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y *= 1.5 - halfx * y * y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y *= 1.5 - halfx * y * y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //y *= 1.5 - halfx * y * y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y *= x;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return (x == 0.0) ? 0 : y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* convert from L/R <-> Mid/Side */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -72,9 +97,162 @@ ms_convert(III_side_info_t * l3_side, in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ init_xrpow_core_c(gr_info * const cod_info, FLOAT xrpow[576], int upper, FLOAT * sum)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vsum,vsum2,vsum3,vsum4,vmax,vmax2,vmax3,vmax4,vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vc1,vc2,vc3,vc4,vc5,vperm;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vconst1 = (vector float)VINIT4ALL(0.25);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vconst2 = (vector float)VINIT4ALL(1.25);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *sum = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_sl(vc1,vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_sl(vc3,vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_or(vc3,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_xor(vsum,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_xor(vmax,vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax2 = vec_xor(vmax2,vmax2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_xor(vsum3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax3 = vec_xor(vmax3,vmax3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum4 = vec_xor(vsum4,vsum4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax4 = vec_xor(vmax4,vmax4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,(cod_info->xr));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm = vec_lvsl(0,(cod_info->xr));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 0; i <= upper-15; i+=16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,(cod_info->xr)+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,(cod_info->xr)+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,(cod_info->xr)+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,(cod_info->xr)+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v0,v1,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v1,v2,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v2,v3,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v3,v4,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_abs(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_abs(v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_abs(v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_abs(v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_add(vsum,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_add(vsum2,v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_add(vsum3,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum4 = vec_add(vsum4,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_re(vec_rsqrte(vec_rsqrte(v9)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_re(vec_rsqrte(vec_rsqrte(v10)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_re(vec_rsqrte(vec_rsqrte(v11)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_re(vec_rsqrte(vec_rsqrte(v12)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = (vector float)vec_cmpeq(vzero,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = (vector float)vec_cmpeq(vzero,v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = (vector float)vec_cmpeq(vzero,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = (vector float)vec_cmpeq(vzero,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_madd(v1,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_madd(v2,v2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_madd(v3,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_madd(v4,v4,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_madd(v13,v13,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_madd(v14,v14,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_madd(v15,v15,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_madd(v16,v16,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v17 = vec_madd(v9,vconst1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v18 = vec_madd(v10,vconst1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v19 = vec_madd(v11,vconst1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v20 = vec_madd(v12,vconst1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_nmsub(v13,v17,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_nmsub(v14,v18,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_nmsub(v15,v19,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_nmsub(v16,v20,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(v13,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(v14,v2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(v15,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(v16,v4,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sel(v1,vzero,(vector unsigned int)v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sel(v2,vzero,(vector unsigned int)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sel(v3,vzero,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_sel(v4,vzero,(vector unsigned int)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v17 = vec_madd(v1,v9,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v18 = vec_madd(v2,v10,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v19 = vec_madd(v3,v11,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v20 = vec_madd(v4,v12,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v17,0,xrpow+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v18,16,xrpow+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v19,32,xrpow+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v20,48,xrpow+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(v17,vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax2 = vec_max(v18,vmax2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax3 = vec_max(v19,vmax3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax4 = vec_max(v20,vmax4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(vmax,vmax2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax3 = vec_max(vmax3,vmax4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(vmax,vmax3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_add(vsum,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_add(vsum3,vsum4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_add(vsum,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_slo(vmax,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_slo(vsum,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_max(v1,vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_slo(v3,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_slo(v4,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(v3,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_add(v4,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_perm(vmax,vmax,vec_lvsr(0,&(cod_info->xrpow_max)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_perm(vsum,vsum,vec_lvsr(0,sum));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vmax,0,&(cod_info->xrpow_max));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum,0,sum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; i <= upper; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp = fabs(cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i] = sqrt(tmp * sqrt(tmp));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef PPC_FRSQRTE
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT tmp2,tmp3,tmp4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 0; i <= upper-3; i+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp = fabs (cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2 = fabs (cod_info->xr[i+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3 = fabs (cod_info->xr[i+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp4 = fabs (cod_info->xr[i+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i] = ppc_sqrt (tmp * ppc_sqrt(tmp));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i+1] = ppc_sqrt (tmp2 * ppc_sqrt(tmp2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i+2] = ppc_sqrt (tmp3 * ppc_sqrt(tmp3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i+3] = ppc_sqrt (tmp4 * ppc_sqrt(tmp4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i+1] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i+1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i+2] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i+2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i+3] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i+3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; i <= upper; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp = fabs(cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i] = ppc_sqrt(tmp * ppc_sqrt(tmp));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i <= upper; ++i) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ tmp = fabs(cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -83,6 +261,8 @@ init_xrpow_core_c(gr_info * const cod_in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (xrpow[i] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ cod_info->xrpow_max = xrpow[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1495,7 +1675,7 @@ VBR_old_iteration_loop(lame_internal_fla
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[2][2][SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int bands[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int frameBits[15];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int used_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1650,7 +1830,7 @@ VBR_new_iteration_loop(lame_internal_fla
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[2][2][SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[2][2][576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[2][2][576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int frameBits[15];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int used_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int max_bits[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1904,7 +2084,7 @@ ABR_iteration_loop(lame_internal_flags *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int targ_bits[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mean_bits, max_frame_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int ch, gr, ath_over;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1991,7 +2171,7 @@ CBR_iteration_loop(lame_internal_flags *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int targ_bits[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mean_bits, max_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int gr, ch;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/quantize_pvt.c.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/quantize_pvt.c 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -27,6 +27,13 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -751,6 +758,39 @@ calc_xmin(lame_internal_flags const *gfc
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static FLOAT
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ calc_noise_core_c(const gr_info * const cod_info, int *startline, int l, FLOAT step)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,va,vb,vstep,vzero,vnoise1,vnoise2,vix01;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm1,vperm2,vperm5,vperm6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vx7,vshamt,vone;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vmask1,vmask2,vmask3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v10,v11,v12,v13,v14,v15,v16,v17;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vc1,vc2,vc3,vc4,vc5,vc6,vperm3,vperm4,vmask;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float temp[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp[0] = step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vstep = vec_ld(0,temp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm6 = (vector unsigned char)VINIT16(0,0,3,19,0,0,7,23,0,0,11,27,0,0,15,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = vec_sld(vperm6,vperm6,2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_splat_u32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_sld((vector unsigned int)vzero,vmask1,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask3 = vec_sld((vector unsigned int)vzero,vmask1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_sld((vector unsigned int)vzero,vmask1,12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm3 = (vector unsigned char)VINIT16(0,0,0,0,0,0,0,0,0,1,2,3,16,17,18,19);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_sld(vperm3,(vector unsigned char)vzero,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = (vector unsigned char)VINIT16ALL(16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vstep = vec_splat(vstep,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_xor(vnoise1,vnoise1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise2 = vec_xor(vnoise2,vnoise2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vone = vec_splat_s32(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_splat_s32(2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT noise = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int j = *startline;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const int *const ix = cod_info->l3_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -767,9 +807,55 @@ calc_noise_core_c(const gr_info * const
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else if (j > cod_info->big_values) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT ix01[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT ix01[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix01[0] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix01[1] = step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vix01 = vec_ld(0,ix01);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = vec_lvsl(0,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_ld(0,ix+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = vec_lvsl(0,ix+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>1;l-=2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_ld(16,ix+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vx1,vx2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_abs(v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vx2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_sl(vx3,(vector unsigned int)vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = vec_add(vx4,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = vec_add(vx4,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx7 = vec_add(vx5,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_perm(vx4,vx5,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vx6,vx7,vperm6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_or(vx2,vx3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(vix01,vix01,(vector unsigned char)vx4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_sub(va,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_madd(va,va,vnoise1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sld(vnoise1,vnoise1,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(vnoise1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sld(v2,v2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v4,v4,vec_lvsr(0,&noise));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v5,0,&noise);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(l) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - ix01[ix[j]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - ix01[ix[j]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (l--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ temp = fabs(cod_info->xr[j]) - ix01[ix[j]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -779,8 +865,138 @@ calc_noise_core_c(const gr_info * const
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = vec_lvsl(0,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>3;l-=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v2,v3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_abs(v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_abs(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v2,v2,vec_lvsl(0,pow43+ix[j]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v6,vec_lvsl(-4,pow43+ix[j+1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v10,v10,vec_lvsl(-8,pow43+ix[j+2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v14,v14,vec_lvsl(-12,pow43+ix[j+3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_sel(v4,v8,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_sel(v4,v12,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_sel(v4,v16,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_nmsub(v4,vstep,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_lvsl(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_lvsl(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_lvsl(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_lvsl(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_or(vc2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_or(vc4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_perm(vc1,vc2,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc6 = vec_perm(vc3,vc4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v3,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v4,v5,vc6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sld(v6,v7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_nmsub(v8,vstep,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j+=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v3,v3,vec_lvsl(0,pow43+ix[j]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v7,v7,vec_lvsl(-4,pow43+ix[j+1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v11,v11,vec_lvsl(-8,pow43+ix[j+2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v17 = vec_perm(v15,v15,vec_lvsl(-12,pow43+ix[j+3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_sel(v5,v9,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_sel(v5,v13,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_sel(v5,v17,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_nmsub(v5,vstep,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_lvsl(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_lvsl(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_lvsl(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_lvsl(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_or(vc2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_or(vc4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_perm(vc1,vc2,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc6 = vec_perm(vc3,vc4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v3,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v4,v5,vc6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sld(v6,v7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_nmsub(v8,vstep,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_madd(va,va,vnoise1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise2 = vec_madd(vb,vb,vnoise2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j+=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_add(vnoise1,vnoise2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>1;l-=2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_abs(v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v2,vec_lvsl(0,pow43+ix[j]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v3,vec_lvsl(-4,pow43+ix[j+1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v4,v4,vec_lvsl(-8,pow43+ix[j+2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v5,v5,vec_lvsl(-12,pow43+ix[j+3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sel(v6,v7,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sel(v6,v8,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sel(v6,v9,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_nmsub(v6,vstep,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_madd(va,va,vnoise1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sld(vnoise1,vnoise1,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(vnoise1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sld(v2,v2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v4,v4,vec_lvsr(0,&noise));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v5,0,&noise);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(l) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (l--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -790,6 +1006,7 @@ calc_noise_core_c(const gr_info * const
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *startline = j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/tables.c.orig 2011-05-08 01:05:17.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/tables.c 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -406,7 +406,7 @@ const uint8_t t33l[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-const struct huffcodetab ht[HTN] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++const struct huffcodetab ht[HTN] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* xlen, linmax, table, hlen */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0, 0, NULL, NULL},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {2, 0, t1HB, t1l},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/takehiro.c.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/takehiro.c 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -26,6 +26,12 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -222,6 +228,150 @@ quantize_lines_xrpow(unsigned int l, FLO
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xr, int *ix)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,va,vb,vistep,vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vprev;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm1,vperm2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float const1 = (vector float)VINIT4(0.4053964553387788,3.404263724373839,5.465086767819913,1.0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float const2 = (vector float)VINIT4(7.719205369637751,10.93017829043677,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int temp[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float temp2[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp2[0] = istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vistep = vec_ld(0,temp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vistep = vec_splat(vistep,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ l = l >> 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = vec_lvsl(0,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = vec_lvsr(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_ld(-16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vec_perm(vx1,vx2,vec_lvsl(0,ix));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>3;l-=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v2,v3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_madd(v4,vistep,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_madd(v5,vistep,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_floor(va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_floor(vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_splat(const1,2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_splat(const1,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_splat(const2,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_splat(const2,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v2,v4,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v3,v4,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v2,v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v3,v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_splat(const1,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_splat(const1,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v8,v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v9,v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v10,v2,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v11,v3,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_re(v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_nmsub(v10,v6,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_nmsub(v11,v7,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v10,v6,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v11,v7,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_madd(v8,v10,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_madd(v9,v11,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_cts(va,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_cts(vb,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vprev,vx1,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_perm(vx1,vx2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx3,0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx4,16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vx2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vx1,vx2,vec_lvsl(0,ix));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_perm(vprev,vx3,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx4,0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>1;l-=2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT x0, x1, x2, x3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int rx0, rx1, rx2, rx3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x0 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x0, rx0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x1, rx1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x3 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x2, rx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x0 += QUANTFAC(rx0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x3, rx3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 += QUANTFAC(rx1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x0, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2 += QUANTFAC(rx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x1, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x3 += QUANTFAC(rx3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x2, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x3, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>1;l-=2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_madd(v4,vistep,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_cts(va,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st((vector unsigned int)vx1,0,temp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,adj43+temp[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,adj43+temp[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,adj43+temp[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,adj43+temp[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v2,vec_lvsl(0,adj43+temp[0]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v3,vec_lvsl(-4,adj43+temp[1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v4,v4,vec_lvsl(-8,adj43+temp[2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v5,v5,vec_lvsl(-12,adj43+temp[3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_add(va,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_cts(va,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vprev,vx1,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx3,0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vx1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vx1,vx2,vec_lvsl(0,ix));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_perm(vprev,vx3,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx4,0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (l) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT x0, x1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int rx0, rx1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x0 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x0, rx0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x1, rx1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x0 += QUANTFAC(rx0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 += QUANTFAC(rx1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x0, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x1, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int remaining;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ assert(l > 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -263,7 +413,7 @@ quantize_lines_xrpow(unsigned int l, FLO
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ XRPOW_FTOI(x0, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ XRPOW_FTOI(x1, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -420,6 +570,60 @@ quantize_xrpow(const FLOAT * xp, int *pi
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* ix_max */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*************************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ix_max_vec(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int vresult[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int max1=0, max2=0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v1, v2, v3, v4, v5, v6, v7, vmax;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vc1,vc2,vc3,vc4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(end - ix < 8) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int i = (end-ix)/4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int remain = (end-ix)%4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_sl(vc1,vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_sl(vc3,vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0, ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0, ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_xor(vmax, vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while(i--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16, ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v1, v2, vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(vmax,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_slo(vmax,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_max(vmax,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_slo(v5,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_max(v5,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v7,0,vresult);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ max1 = vresult[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(!remain) return max1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //max2 = vresult[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*if(vresult[2] > max1) max1 = vresult[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(vresult[3] > max2) max2 = vresult[3];*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x2 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (max1 < x1) max1 = x1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (max2 < x2) max2 = x2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(max1 < max2) max1 = max2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return max1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix_max(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -438,14 +642,14 @@ ix_max(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ max1 = max2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return max1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(ALTIVEC) || (defined(ALTIVEC) && !defined(ALTIVEC_970))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ count_bit_ESC(const int *ix, const int *const end, int t1, const int t2, unsigned int *const s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -481,6 +685,7 @@ count_bit_ESC(const int *ix, const int *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -507,6 +712,7 @@ static const int huf_tbl_noESC[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(ALTIVEC)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ count_bit_noESC_from2(const int *ix, const int *end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -533,6 +739,7 @@ count_bit_noESC_from2(const int *ix, con
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -572,6 +779,651 @@ count_bit_noESC_from3(const int *ix, con
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_ESC_altivec(const int *ix, const int *const end, int t1, const int t2, int *const s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* ESC-table is used */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int const linbits = ht[t1].xlen * 65536 + ht[t2].xlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int sum = 0, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vsum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vshamt;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vzero,vs1,vs2,vs3,vs4,vs5,vs6,vlimit1,vlimit2,vone;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned char tmp[16] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int tmp2[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlimit1 = vec_splat_u8(14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlimit2 = vec_splat_u8(15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vone = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_splat_u8(4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_xor(vsum,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_packs(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_packs(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector signed int)vec_packs(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector signed int)vec_packs(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = vec_packs((vector unsigned short)v1,(vector unsigned short)v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = vec_packs((vector unsigned short)v2,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = vec_sel(vs1,vlimit2,vec_cmpgt(vs1,vlimit1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs4 = vec_sel(vs2,vlimit2,vec_cmpgt(vs2,vlimit1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs5 = vec_sel(vzero,vone,vec_cmpgt(vs1,vlimit1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs6 = vec_sel(vzero,vone,vec_cmpgt(vs2,vlimit1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs5 = vec_add(vs5,vs6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_sum4s(vs5,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = vec_sl(vs3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = vec_add(vs3,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vs3,0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[4]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[5]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[6]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[7]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[8]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[9]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[10]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[11]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[12]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[13]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[14]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[15]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = (vector unsigned int)vec_sums((vector signed int)vsum,(vector signed int)vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vsum,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += tmp2[3] * linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int y = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (x >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (y >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x <<= 4u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x += y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int y = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (x >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (y >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x <<= 4u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x += y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 = sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum >>= 16u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t1 = t2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from2_altivec1(const int *ix, const int *end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int sum1, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int *table = table23;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vxlen,vzero,vsum1,vsum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vx;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vhlen1,vhlen2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed char vs1,vs2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen1 = (vector unsigned char)VINIT16(1,4,7,4,5,7,6,7,8,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen2 = (vector unsigned char)VINIT16(2,3,7,4,4,7,6,7,8,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxlen = vec_splat_s32(3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_xor(vsum1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = (vector signed int)vec_mladd((vector unsigned short)v1,(vector unsigned short)vxlen,(vector unsigned short)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = (vector signed int)vec_mladd((vector unsigned short)v3,(vector unsigned short)vxlen,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = (vector signed int)vec_pack(vx1,vx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = (vector signed int)vec_mladd((vector unsigned short)v5,(vector unsigned short)vxlen,(vector unsigned short)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = (vector signed int)vec_mladd((vector unsigned short)v7,(vector unsigned short)vxlen,(vector unsigned short)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = (vector signed int)vec_pack(vx4,vx5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = (vector unsigned char)vec_pack((vector unsigned short)vx3,(vector unsigned short)vx6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = (vector signed char)vec_perm(vhlen1,vhlen1,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = (vector signed char)vec_perm(vhlen2,vhlen2,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sum4s(vs1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sum4s(vs2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sums(vsum1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sums(vsum2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(vsum1,vsum1,vec_lvsr(4,&sum1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_perm(vsum2,vsum2,vec_lvsr(4,&sum2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum1,0,&sum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum2,0,&sum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += table[ x0 * xlen + x1 ];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = (sum>>16u) + sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += table[ x0 * xlen + x1 ];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 = sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum >>= 16u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t1++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from2_altivec2(const int *ix, const int *end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int sum1, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int *table = table56;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vxlen,vzero,vsum1,vsum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vx;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vhlen1,vhlen2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed char vs1,vs2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen1 = (vector unsigned char)VINIT16(1,4,7,8,4,5,8,9,7,8,9,10,8,8,9,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen2 = (vector unsigned char)VINIT16(3,4,6,8,4,4,6,7,5,6,7,8,7,7,8,9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxlen = vec_splat_s32(4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_xor(vsum1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = (vector signed int)vec_mladd((vector unsigned short)v1,(vector unsigned short)vxlen,(vector unsigned short)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = (vector signed int)vec_mladd((vector unsigned short)v3,(vector unsigned short)vxlen,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = (vector signed int)vec_pack(vx1,vx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = (vector signed int)vec_mladd((vector unsigned short)v5,(vector unsigned short)vxlen,(vector unsigned short)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = (vector signed int)vec_mladd((vector unsigned short)v7,(vector unsigned short)vxlen,(vector unsigned short)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = (vector signed int)vec_pack(vx4,vx5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = (vector unsigned char)vec_pack((vector unsigned short)vx3,(vector unsigned short)vx6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = (vector signed char)vec_perm(vhlen1,vhlen1,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = (vector signed char)vec_perm(vhlen2,vhlen2,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sum4s(vs1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sum4s(vs2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sums(vsum1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sums(vsum2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(vsum1,vsum1,vec_lvsr(4,&sum1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_perm(vsum2,vsum2,vec_lvsr(4,&sum2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum1,0,&sum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum2,0,&sum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += table[ x0 * xlen + x1 ];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = (sum>>16u) + sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += table[ x0 * xlen + x1 ];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 = sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum >>= 16u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t1++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from3_altivec1(const int *ix, const int *const end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum1 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum3 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen1 = ht[7].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen2 = ht[8].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen3 = ht[9].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vxlen,vzero,vsum1,vsum2,vsum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vx,v31;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vhlen11,vhlen12,vhlen13,vhlen21,vhlen22,vhlen23,vhlen31,vhlen32,vhlen33;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed char vs1,vs2,vs3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen11 = (vector unsigned char)VINIT16(1,4,7,9,9,10,4,6,8,9,9,10,7,7,9,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen12 = (vector unsigned char)VINIT16(10,11,8,9,10,11,11,11,8,9,10,11,11,12,9,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen13 = (vector unsigned char)VINIT16(11,12,12,12,0,0,0,0,0,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen21 = (vector unsigned char)VINIT16(2,4,7,9,9,10,4,4,6,10,10,10,7,6,8,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen22 = (vector unsigned char)VINIT16(10,11,9,10,10,11,11,12,9,9,10,11,12,12,10,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen23 = (vector unsigned char)VINIT16(11,11,13,13,0,0,0,0,0,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen31 = (vector unsigned char)VINIT16(3,4,6,7,9,10,4,5,6,7,8,10,5,6,7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen32 = (vector unsigned char)VINIT16(9,10,7,7,8,9,9,10,8,8,9,9,10,11,9,9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen33 = (vector unsigned char)VINIT16(10,10,11,11,0,0,0,0,0,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v31 = (vector unsigned char)VINIT16ALL(31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxlen = vec_splat_s32(6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_xor(vsum1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_xor(vsum3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //int *end2 = ix + 32*((int)(end - ix)/32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = (vector signed int)vec_mladd((vector unsigned short)v1,(vector unsigned short)vxlen,(vector unsigned short)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = (vector signed int)vec_mladd((vector unsigned short)v3,(vector unsigned short)vxlen,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = (vector signed int)vec_pack(vx1,vx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = (vector signed int)vec_mladd((vector unsigned short)v5,(vector unsigned short)vxlen,(vector unsigned short)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = (vector signed int)vec_mladd((vector unsigned short)v7,(vector unsigned short)vxlen,(vector unsigned short)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = (vector signed int)vec_pack(vx4,vx5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = (vector unsigned char)vec_pack((vector unsigned short)vx3,(vector unsigned short)vx6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_perm(vhlen11,vhlen12,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_perm(vhlen13,vhlen13,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector signed int)vec_perm(vhlen21,vhlen22,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector signed int)vec_perm(vhlen23,vhlen23,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = (vector signed int)vec_perm(vhlen31,vhlen32,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = (vector signed int)vec_perm(vhlen33,vhlen33,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = (vector signed int)vec_cmpgt(vx,v31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = (vector signed char)vec_sel(v1,v2,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = (vector signed char)vec_sel(v3,v4,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = (vector signed char)vec_sel(v5,v6,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sum4s(vs1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sum4s(vs2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_sum4s(vs3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sums(vsum1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sums(vsum2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_sums(vsum3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(vsum1,vsum1,vec_lvsr(4,&sum1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_perm(vsum2,vsum2,vec_lvsr(4,&sum2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_perm(vsum3,vsum3,vec_lvsr(4,&sum3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum1,0,&sum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum2,0,&sum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum3,0,&sum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x = ix[0] * xlen + ix[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x = ix[0] * xlen + ix[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum3) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1+2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from3_altivec2(const int *ix, const int *const end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum1 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum3 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen1 = ht[10].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen2 = ht[11].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen3 = ht[12].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vxlen,vzero,vsum1,vsum2,vsum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vx,v31;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vhlen11,vhlen12,vhlen13,vhlen14,vhlen21,vhlen22,vhlen23,vhlen24,vhlen31,vhlen32,vhlen33,vhlen34;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed char vs1,vs2,vs3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen11 = (vector unsigned char)VINIT16( 1, 4, 7, 9, 10, 10, 10, 11, 4, 6, 8, 9, 10, 11, 10, 10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen12 = (vector unsigned char)VINIT16( 7, 8, 9, 10, 11, 12, 11, 11, 8, 9, 10, 11, 12, 12, 11, 12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen13 = (vector unsigned char)VINIT16( 9, 10, 11, 12, 12, 12, 12, 12,10, 11, 12, 12, 13, 13, 12, 13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen14 = (vector unsigned char)VINIT16( 9, 10, 11, 12, 12, 12, 13, 13,10, 10, 11, 12, 12, 13, 13, 13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen21 = (vector unsigned char)VINIT16( 2, 4, 6, 8, 9, 10, 9, 10, 4, 5, 6, 8, 10, 10, 9, 10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen22 = (vector unsigned char)VINIT16( 6, 7, 8, 9, 10, 11, 10, 10, 8, 8, 9, 11, 10, 12, 10, 11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen23 = (vector unsigned char)VINIT16( 9, 10, 10, 11, 11, 12, 11, 12, 9, 10, 11, 12, 12, 13, 12, 13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen24 = (vector unsigned char)VINIT16( 9, 9, 9, 10, 11, 12, 12, 12, 9, 9, 10, 11, 12, 12, 12, 12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen31 = (vector unsigned char)VINIT16( 4, 4, 6, 8, 9, 10, 10, 10, 4, 5, 6, 7, 9, 9, 10, 10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen32 = (vector unsigned char)VINIT16( 6, 6, 7, 8, 9, 10, 9, 10, 7, 7, 8, 8, 9, 10, 10, 10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen33 = (vector unsigned char)VINIT16( 8, 8, 9, 9, 10, 10, 10, 11, 9, 9, 10, 10, 10, 11, 10, 11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen34 = (vector unsigned char)VINIT16( 9, 9, 9, 10, 10, 11, 11, 12,10, 10, 10, 11, 11, 11, 11, 12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v31 = (vector unsigned char)VINIT16ALL(31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxlen = vec_splat_s32(8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_xor(vsum1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_xor(vsum3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //int *end2 = ix + 32*((int)(end - ix)/32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = (vector signed int)vec_mladd((vector unsigned short)v1,(vector unsigned short)vxlen,(vector unsigned short)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = (vector signed int)vec_mladd((vector unsigned short)v3,(vector unsigned short)vxlen,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = (vector signed int)vec_pack(vx1,vx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = (vector signed int)vec_mladd((vector unsigned short)v5,(vector unsigned short)vxlen,(vector unsigned short)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = (vector signed int)vec_mladd((vector unsigned short)v7,(vector unsigned short)vxlen,(vector unsigned short)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = (vector signed int)vec_pack(vx4,vx5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = (vector unsigned char)vec_pack((vector unsigned short)vx3,(vector unsigned short)vx6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_perm(vhlen11,vhlen12,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_perm(vhlen13,vhlen14,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector signed int)vec_perm(vhlen21,vhlen22,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector signed int)vec_perm(vhlen23,vhlen24,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = (vector signed int)vec_perm(vhlen31,vhlen32,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = (vector signed int)vec_perm(vhlen33,vhlen34,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = (vector signed int)vec_cmpgt(vx,v31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = (vector signed char)vec_sel(v1,v2,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = (vector signed char)vec_sel(v3,v4,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = (vector signed char)vec_sel(v5,v6,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sum4s(vs1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sum4s(vs2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_sum4s(vs3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sums(vsum1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sums(vsum2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_sums(vsum3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(vsum1,vsum1,vec_lvsr(4,&sum1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_perm(vsum2,vsum2,vec_lvsr(4,&sum2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_perm(vsum3,vsum3,vec_lvsr(4,&sum3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum1,0,&sum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum2,0,&sum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum3,0,&sum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x = ix[0] * xlen + ix[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x = ix[0] * xlen + ix[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum3) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1+2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*************************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* choose table */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -599,12 +1451,21 @@ typedef int (*count_fnc)(const int* ix,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const count_fnc count_fncs[] =
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ { &count_bit_null
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from2_altivec1
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from2_altivec2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_altivec1
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_altivec1
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_altivec2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_altivec2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -621,7 +1482,11 @@ choose_table_nonMMX(const int *ix, const
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int* s = (unsigned int*)_s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int max;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int choice, choice2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ max = ix_max_vec(ix, end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ max = ix_max(ix, end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (max <= 15) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return count_fncs[max](ix, end, max, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -643,7 +1508,11 @@ choose_table_nonMMX(const int *ix, const
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ break;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(ALTIVEC) && defined(ALTIVEC_970)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return count_bit_ESC_altivec(ix, end, choice, choice2, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return count_bit_ESC(ix, end, choice, choice2, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/util.c.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/util.c 2017-10-14 18:11:48.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -26,6 +26,12 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(ALTIVEC) && !defined(ALTIVEC_970)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <float.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -954,6 +960,108 @@ disable_FPE(void)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ***********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(ALTIVEC) && !defined(ALTIVEC_970)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline ieee754_float32_t fast_log10_altivec(ieee754_float32_t x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float va,vb,vc,vhalf,vzero,vsqrt2,vconst4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,vz,vz2,vlog;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vconst1,vconst2,vshamt;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vconst3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float out __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = (vector float)VINIT4ALL(0.8685890659);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = (vector float)VINIT4ALL(0.2894672153);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc = (vector float)VINIT4ALL(0.1793365895);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = (vector float)VINIT4ALL(0.15051499783);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(1.4142135623731);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst4 = (vector float)VINIT4ALL(0.301029995664);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst1 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(9));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_nor(vconst2,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst3 = (vector signed int)vec_rl(vconst2,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vec_splat_u32(9),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vshamt,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_sl((vector unsigned int)vconst3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,&x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v1,v1,vec_lvsl(0,&x));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_splat(v2,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector float)vec_sel(vconst2,(vector unsigned int)v3,vconst1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz = vec_madd(v6, vec_madd(vec_nmsub(v7,v5,(vector float)vconst2),v7,v7), vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = (vector float)vec_sr((vector unsigned int)v3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_ctf(vec_sub((vector signed int)v8,vconst3),0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz2 = vec_madd(vz,vz,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vlog,vconst4,vhalf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vz2,vc,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vz2,v1,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vz,v2,vlog);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vlog,0,&out);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return out;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline ieee754_float32_t fast_loge_altivec(ieee754_float32_t x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float va,vb,vc,vhalf,vzero,vsqrt2,vconst4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,vz,vz2,vlog;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vconst1,vconst2,vshamt;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vconst3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float out __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = (vector float)VINIT4ALL(2.0000006209);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = (vector float)VINIT4ALL(0.6664778517);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc = (vector float)VINIT4ALL(0.4139745860);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = (vector float)VINIT4ALL(0.34657359028);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(1.4142135623731);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst4 = (vector float)VINIT4ALL(0.6931471805599);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst1 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(9));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_nor(vconst2,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst3 = (vector signed int)vec_rl(vconst2,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vec_splat_u32(9),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vshamt,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_sl((vector unsigned int)vconst3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,&x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v1,v1,vec_lvsl(0,&x));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_splat(v2,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector float)vec_sel(vconst2,(vector unsigned int)v3,vconst1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz = vec_madd(v6, vec_madd(vec_nmsub(v7,v5,(vector float)vconst2),v7,v7), vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = (vector float)vec_sr((vector unsigned int)v3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_ctf(vec_sub((vector signed int)v8,vconst3),0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz2 = vec_madd(vz,vz,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vlog,vconst4,vhalf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vz2,vc,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vz2,v1,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vz,v2,vlog);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vlog,0,&out);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return out;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++init_log_table(void)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define LOG2_SIZE (512)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define LOG2_SIZE_L2 (9)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1004,6 +1112,8 @@ fast_log2(ieee754_float32_t x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return log2val;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #else /* Don't use FAST_LOG */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/util.h.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/util.h 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -93,10 +93,17 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* log/log10 approximations */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifdef USE_FAST_LOG
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(ALTIVEC) && !defined(ALTIVEC_970)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define FAST_LOG10(x) (fast_log10_altivec(x))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define FAST_LOG(x) (fast_loge_altivec(x))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define FAST_LOG10_X(x,y) (fast_log10_altivec(x)*(y))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define FAST_LOG_X(x,y) (fast_loge_altivec(x)*(y))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG10(x) (fast_log2(x)*(LOG2/LOG10))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG(x) (fast_log2(x)*LOG2)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG10_X(x,y) (fast_log2(x)*(LOG2/LOG10*(y)))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG_X(x,y) (fast_log2(x)*(LOG2*(y)))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG10(x) log10(x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG(x) log(x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -186,14 +193,14 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT masking_lower[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT masking_lower[CBANDS] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT minval[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT rnumlines[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT mld_cb[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT mld[Max(SBMAX_l,SBMAX_s)];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT bo_weight[Max(SBMAX_l,SBMAX_s)]; /* band weight long scalefactor bands, at transition */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT attack_threshold; /* short block tuning */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- int s3ind[CBANDS][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int s3ind[CBANDS][4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int numlines[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int bm[Max(SBMAX_l,SBMAX_s)];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int bo[Max(SBMAX_l,SBMAX_s)];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -219,7 +226,7 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT nb_l1[4][CBANDS], nb_l2[4][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT nb_l1[4][CBANDS] __attribute__ ((aligned (16))), nb_l2[4][CBANDS] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT nb_s1[4][CBANDS], nb_s2[4][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ III_psy_xmin thm[4];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -246,7 +253,7 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* variables used by encoder.c */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* variables for newmdct.c */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT sb_sample[2][2][18][SBLIMIT];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT sb_sample[2][2][18][SBLIMIT] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT amp_filter[32];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* variables used by util.c */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -293,7 +300,7 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifndef MFSIZE
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define MFSIZE ( 3*1152 + ENCDELAY - MDCTDELAY )
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- sample_t mfbuf[2][MFSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sample_t mfbuf[2][MFSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mf_samples_to_encode;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mf_size;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -567,7 +574,12 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* log/log10 approximations */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ extern void init_log_table(void);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(ALTIVEC) && !defined(ALTIVEC_970)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ extern ieee754_float32_t fast_log10_altivec(ieee754_float32_t x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ extern ieee754_float32_t fast_loge_altivec(ieee754_float32_t x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ extern ieee754_float32_t fast_log2(ieee754_float32_t x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int isResamplingNecessary(SessionConfig_t const* cfg);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/vbrquantize.c.orig 2012-02-07 22:36:35.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/vbrquantize.c 2017-10-14 18:02:08.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -26,6 +26,12 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -217,8 +223,23 @@ k_34_4(DOUBLEX x[4], int l3[4])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static FLOAT
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, unsigned int bw, uint8_t sf)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float vpow[8] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v0, v1, v2, v3, v4, v5, v6,v7,v8,v9,v10,v11,v12,v13;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm1, vperm2,vc1,vc2,vc3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vl1,vl2,vl3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vxfsf, vsfpow, vsfpow34, vabs, vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int s1,s2,s3,s4,s5,s6,s7,s8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float const1 = (vector float)VINIT4(0.4053964553387788,3.404263724373839,5.465086767819913,1.0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float const2 = (vector float)VINIT4(7.719205369637751,10.93017829043677,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vmask1,vmask2,vmask3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm3,vperm4,vc4,vc5,vc6,vmask;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ DOUBLEX x[4];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- int l3[4];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int l3[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const FLOAT sfpow = pow20[sf + Q_MAX2]; /*pow(2.0,sf/4.0); */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const FLOAT sfpow34 = ipow20[sf]; /*pow(sfpow,-3.0/4.0); */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -226,6 +247,239 @@ calc_sfb_noise_x34(const FLOAT * xr, con
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int i = bw >> 2u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int const remaining = (bw & 0x03u);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[0] = sfpow;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[1] = sfpow34;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsfpow = vec_ld(0,vpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_xor(vxfsf,vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsfpow34 = vec_splat(vsfpow,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsfpow = vec_splat(vsfpow,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = vec_lvsl(0,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = vec_lvsl(0,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vabs = (vector float)vec_splat_s32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vabs = (vector float)vec_sl((vector unsigned int)vabs, (vector unsigned int)vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_sl(vc1,vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = (vector unsigned int)vec_splat_s32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_sro(vmask1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_sro(vmask1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask3 = vec_sro(vmask2,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm3 = (vector unsigned char)VINIT16(0,0,0,0,0,0,0,0,0,1,2,3,16,17,18,19);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_sld(vperm3,(vector unsigned char)vzero,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = (vector unsigned char)VINIT16ALL(16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; i > 1; i -= 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v2,v3,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v4,vsfpow34,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_madd(v5,vsfpow34,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_floor(v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_floor(v13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_splat(const1,2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_splat(const1,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_splat(const2,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_splat(const2,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v2,v4,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v3,v4,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v2,v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v3,v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_splat(const1,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_splat(const1,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v8,v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v9,v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v10,v2,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v11,v3,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_re(v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_nmsub(v10,v6,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_nmsub(v11,v7,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v10,v6,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v11,v7,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v8,v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v9,v11,v13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl1 = vec_cts(v10,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl2 = vec_cts(v11,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl3 = (vector signed int)vec_pack(vl1,vl2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vl3,0,l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = l3[0] >> 16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = l3[0] & 0xffff;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s3 = l3[1] >> 16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s4 = l3[1] & 0xffff;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s5 = l3[2] >> 16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s6 = l3[2] & 0xffff;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s7 = l3[3] >> 16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s8 = l3[3] & 0xffff;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+s1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+s2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+s3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+s4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v2,v2,vec_lvsl(0,pow43+s1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v3,v3,vec_lvsl(-4,pow43+s2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v4,v4,vec_lvsl(-8,pow43+s3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v5,vec_lvsl(-12,pow43+s4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sel(v2,v3,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sel(v12,v4,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sel(v12,v5,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+s5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+s6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+s7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+s8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v2,v2,vec_lvsl(0,pow43+s5));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v3,v3,vec_lvsl(-4,pow43+s6));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v4,v4,vec_lvsl(-8,pow43+s7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v5,vec_lvsl(-12,pow43+s8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sel(v2,v3,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sel(v13,v4,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sel(v13,v5,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_lvsl(0,pow43+s1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_lvsl(0,pow43+s2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_lvsl(0,pow43+s3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_lvsl(0,pow43+s4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_or(vc2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_or(vc4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+s1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+s2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+s3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+s4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_perm(vc1,vc2,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc6 = vec_perm(vc3,vc4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v3,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v4,v5,vc6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sld(v6,v7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_lvsl(0,pow43+s5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_lvsl(0,pow43+s6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_lvsl(0,pow43+s7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_lvsl(0,pow43+s8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_or(vc2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_or(vc4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+s5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+s6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+s7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+s8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_perm(vc1,vc2,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc6 = vec_perm(vc3,vc4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v3,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v4,v5,vc6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sld(v6,v7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16, xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32, xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v0,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v2,v3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_andc(v6,vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_andc(v7,vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_nmsub(vsfpow, v12, v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_nmsub(vsfpow, v13, v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_madd(v10, v10, vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_madd(v11, v11, vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr34 += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (i) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef ALTIVEC_970
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[0] = sfpow34 * xr34[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[1] = sfpow34 * xr34[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[2] = sfpow34 * xr34[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[3] = sfpow34 * xr34[3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ k_34_4(x, l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[0] = pow43[l3[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[1] = pow43[l3[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[2] = pow43[l3[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[3] = pow43[l3[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0, vpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16, xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v0,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_andc(v3,vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_nmsub(vsfpow, v1, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_madd(v5, v5, vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v1,v2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(v3,vsfpow34,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl1 = vec_cts(v4,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vl1,0,l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,adj43+l3[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_lde(0,adj43+l3[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_lde(0,adj43+l3[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_lde(0,adj43+l3[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v5,v5,vec_lvsl(0,adj43+l3[0]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v6,v6,vec_lvsl(-4,adj43+l3[1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v7,v7,vec_lvsl(-8,adj43+l3[2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v8,v8,vec_lvsl(-12,adj43+l3[3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_or(v9,v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_or(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_or(v9,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_add(v4,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl1 = vec_cts(v10,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vl1,0,l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+l3[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+l3[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+l3[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+l3[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v2,vec_lvsl(0,pow43+l3[0]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v3,vec_lvsl(-4,pow43+l3[1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v4,v4,vec_lvsl(-8,pow43+l3[2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v5,v5,vec_lvsl(-12,pow43+l3[3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16, xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v0,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_andc(v3,vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_nmsub(vsfpow, v6, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_madd(v5, v5, vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr34 += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (remaining) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[0] = x[1] = x[2] = x[3] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ switch( remaining ) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 3: x[2] = sfpow34 * xr34[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 2: x[1] = sfpow34 * xr34[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 1: x[0] = sfpow34 * xr34[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ k_34_4(x, l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[0] = x[1] = x[2] = x[3] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ switch( remaining ) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 3: x[2] = fabsf(xr[2]) - sfpow * pow43[l3[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 2: x[1] = fabsf(xr[1]) - sfpow * pow43[l3[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 1: x[0] = fabsf(xr[0]) - sfpow * pow43[l3[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vxfsf,0,vpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return xfsf + vpow[0] + vpow[1] + vpow[2] + vpow[3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (i-- > 0) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[0] = sfpow34 * xr34[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[1] = sfpow34 * xr34[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -262,6 +516,7 @@ calc_sfb_noise_x34(const FLOAT * xr, con
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return xfsf;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span>\ No newline at end of file
<span style='display:block; white-space:pre;color:#808080;'>diff --git a/audio/lame/files/lame-3.100-altivec-20171217.diff b/audio/lame/files/lame-3.100-altivec-20171217.diff
</span>new file mode 100644
<span style='display:block; white-space:pre;color:#808080;'>index 00000000000..cbb948486e9
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>--- /dev/null
</span><span style='display:block; white-space:pre;background:#e0e0ff;'>+++ b/audio/lame/files/lame-3.100-altivec-20171217.diff
</span><span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -0,0 +1,4263 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+# https://github.com/classilla/lamevmx
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+# See also: https://tmkk.undo.jp/lame/lame-3.100-altivec-20171014.diff
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git README.md README.md
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+new file mode 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 0000000..a82835a
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- /dev/null
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ README.md
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -0,0 +1,14 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++# [LAMEVMX: LAME Ain't an MP3 Encoder with VMX](http://www.floodgap.com/software/lamevmx/)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++A PowerPC-optimized build of LAME 3.100 with [tmkk's patches for AltiVec](http://tmkk.undo.jp/lame/index_e.html), enhanced with additional G5 optimizations and build-system fixes. Intended for lovely Power Macs and not icky Intel Macs, which are better served by the mainline build. Maintained by Cameron Kaiser (classilla@floodgap.com).
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++How to build (GNU `make` from MacPorts strongly recommended):
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++* Have a 10.4 system with Xcode 2.5. (It may or may not work on 10.5 with Xcode 3. It probably doesn't work on 10.6. It will *not* work on 10.7+.)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++* Clone it.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++* `./configure`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++* `make` or `gmake`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++You will have a three-headed multi-architecture binary in `frontend/lame` with versions for G3, G4 and G5 processors. The same binary runs on all systems. Do `gmake test` for a quick test of functionality.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++On my Quad G5 (2.5GHz), LAMEVMX achieves approximately 25x playback speed at peak.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git configure configure
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 52dbf02..1e34a9b 100755
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- configure
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ configure
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -7616,6 +7616,7 @@ IFS=$as_save_IFS
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fi
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fi
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ac_cv_prog_ac_ct_NMEDIT="true" # doesn't work right on 10.4
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if test -n "$ac_ct_NMEDIT"; then
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -8003,6 +8004,7 @@ if ac_fn_c_try_link "$LINENO"; then :
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ lt_cv_ld_exported_symbols_list=no
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fi
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++lt_cv_ld_exported_symbols_list=no # doesn't work right on 10.4 ld
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ rm -f core conftest.err conftest.$ac_objext \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ conftest$ac_exeext conftest.$ac_ext
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ LDFLAGS=$save_LDFLAGS
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -17438,7 +17440,6 @@ if test "x$HAVE_GCC" = "xyes" -o "x$HAVE_CLANG" = "xyes"; then
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ OPTIMIZATION_NORM="-fschedule-insns2"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fi
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # generic CPU specific options
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ case ${host_cpu} in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ sparc)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -17604,9 +17605,7 @@ else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ $as_echo "no" >&6; }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fi
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++OPTIMIZATION="-arch ppc750 -arch ppc7400 -arch ppc970 -O3 -fomit-frame-pointer -ffast-math -funroll-loops -isysroot @SYSROOT@"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for debug options" >&5
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ $as_echo_n "checking for debug options... " >&6; }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git frontend/Makefile.in frontend/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 4f15e55..261d7dd 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- frontend/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ frontend/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -464,17 +464,18 @@ clean-binPROGRAMS:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ echo " rm -f" $$list; \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ rm -f $$list
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++# The Universal build does not work against the ar-static libs.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ lame$(EXEEXT): $(lame_OBJECTS) $(lame_DEPENDENCIES) $(EXTRA_lame_DEPENDENCIES)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @rm -f lame$(EXEEXT)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- $(AM_V_CCLD)$(LINK) $(lame_OBJECTS) $(lame_LDADD) $(LIBS)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ $(AM_V_CCLD)$(LINK) $(lame_OBJECTS) ../libmp3lame/.libs/*.o ../mpglib/*.o -lncurses -liconv -lm
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mp3rtp$(EXEEXT): $(mp3rtp_OBJECTS) $(mp3rtp_DEPENDENCIES) $(EXTRA_mp3rtp_DEPENDENCIES)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @rm -f mp3rtp$(EXEEXT)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- $(AM_V_CCLD)$(LINK) $(mp3rtp_OBJECTS) $(mp3rtp_LDADD) $(LIBS)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ $(AM_V_CCLD)$(LINK) $(mp3rtp_OBJECTS) ../libmp3lame/.libs/*.o ../mpglib/*.o -lncurses -liconv -lm
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mp3x$(EXEEXT): $(mp3x_OBJECTS) $(mp3x_DEPENDENCIES) $(EXTRA_mp3x_DEPENDENCIES)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @rm -f mp3x$(EXEEXT)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- $(AM_V_CCLD)$(LINK) $(mp3x_OBJECTS) $(mp3x_LDADD) $(LIBS)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ $(AM_V_CCLD)$(LINK) $(mp3x_OBJECTS) ../libmp3lame/.libs/*.o ../mpglib/*.o -lncurses -liconv -lm
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mostlyclean-compile:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -rm -f *.$(OBJEXT)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -497,22 +498,22 @@ distclean-compile:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timestatus.Po@am__quote@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.o:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.obj:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.lo:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git frontend/parse.c frontend/parse.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 752613f..6e1db2f 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- frontend/parse.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ frontend/parse.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -99,6 +99,22 @@ char *strchr(), *strrchr();
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int const lame_alpha_version_enabled = LAME_ALPHA_VERSION;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int const internal_opts_enabled = INTERNAL_OPTS;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++/* 10.4 does not implement strnlen(), so ... */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++/* Find the length of S, but scan at most MAXLEN characters. If no '\0'
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ terminator is found within the first MAXLEN characters, return MAXLEN. */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++size_t
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++strnlen (s, maxlen)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ register const char *s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ size_t maxlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ register const char *e;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ size_t n;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (e = s, n = 0; *e && n < maxlen; e++, n++)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return n;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* GLOBAL VARIABLES. set by parse_args() */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* we need to clean this up */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/Makefile.in libmp3lame/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 5437b38..27acde2 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -577,22 +577,22 @@ distclean-compile:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/version.Plo@am__quote@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.o:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.obj:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.lo:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/fft.c libmp3lame/fft.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 4eea1ad..19863d1 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/fft.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/fft.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -38,6 +38,10 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -66,6 +70,17 @@ fht(FLOAT * fz, int n)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT *fi, *gi;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const *fn;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float csvec[16] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vfi0,vfi1,vfi2,vfi3,vgi0,vgi1,vgi2,vgi3,vf0,vf1,vf2,vf3,vg0,vg1,vg2,vg3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vprev1,vprev2,vprev3,vprev4,vc1,vc2,vs1,vs2,vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm1,vperm2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(16,17,18,19,12,13,14,15,8,9,10,11,4,5,6,7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(16,17,18,19,4,5,6,7,8,9,10,11,12,13,14,15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ n <<= 1; /* to get BLKSIZE, because of 3DNow! ASM routine */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fn = fz + n;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -103,6 +118,238 @@ fht(FLOAT * fz, int n)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } while (fi < fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c1 = tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ s1 = tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(kx < 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 1; i < kx; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT c2, s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = 1 - (2 * s1) * s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = (2 * s1) * c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi = fz + i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi = fz + k1 - i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT a, b, g0, f0, f1, g1, f2, g2, f3, g3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b = s2 * fi[k1] - c2 * gi[k1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ a = c2 * fi[k1] + s2 * gi[k1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ f1 = fi[0] - a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ f0 = fi[0] + a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ g1 = gi[0] - b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ g0 = gi[0] + b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b = s2 * fi[k3] - c2 * gi[k3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ a = c2 * fi[k3] + s2 * gi[k3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ f3 = fi[k2] - a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ f2 = fi[k2] + a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ g3 = gi[k2] - b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ g2 = gi[k2] + b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b = s1 * f2 - c1 * g3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ a = c1 * f2 + s1 * g3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi[k2] = f0 - a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi[0] = f0 + a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi[k3] = g1 - b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi[k1] = g1 + b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b = c1 * g2 - s1 * f3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ a = s1 * g2 + c1 * f3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi[k2] = g0 - a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi[0] = g0 + a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi[k3] = f1 - b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi[k1] = f1 + b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (fi < fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT c2, s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(i = 1; i < 4; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = 1 - (2*s1)*s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = (2*s1)*c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[i] = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[i+4] = c2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[i+8] = s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[i+12] = s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_ld(0,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_ld(16,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = vec_ld(32,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = vec_ld(48,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi = fz;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi = fz + k1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi0 = vec_ld(0,fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi1 = vec_ld(0,fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi2 = vec_ld(0,fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi3 = vec_ld(0,fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev1 = vec_ld(0,gi-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev2 = vec_ld(0,gi+k1-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev3 = vec_ld(0,gi+k2-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev4 = vec_ld(0,gi+k3-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vec_perm(vprev1,vprev1,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vec_perm(vprev2,vprev2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vec_perm(vprev3,vprev3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vec_perm(vprev4,vprev4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vfi1,vc2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vfi1,vs2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vfi3,vc2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(vfi3,vs2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(vgi1,vs2,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_nmsub(vgi1,vc2,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(vgi3,vs2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_nmsub(vgi3,vc2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf0 = vec_add(vfi0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_sub(vfi0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg0 = vec_add(vgi0,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg1 = vec_sub(vgi0,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_add(vfi2,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sub(vfi2,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg2 = vec_add(vgi2,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg3 = vec_sub(vgi2,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vf2,vc1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vf2,vs1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vg2,vs1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(vg2,vc1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(vg3,vs1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_nmsub(vg3,vc1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(vf3,vc1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_nmsub(vf3,vs1,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_add(vf0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_sub(vf0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(vg1,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sub(vg1,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_add(vg0,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_sub(vg0,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vf1,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_sub(vf1,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,vfi0,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v10,vfi2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v15,vfi1,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v16,vfi3,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v1,0,fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v2,0,fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v3,0,fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v4,0,fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v11,vprev2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v12,vprev4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v13,vprev1,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v14,vprev3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v1,0,gi+k1-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v2,0,gi+k3-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v3,0,gi-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v4,0,gi+k2-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (fi<fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* rest loop */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 4; i < kx; i+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < 4; j++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = 1 - (2*s1)*s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = (2*s1)*c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[j] = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[j+4] = c2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[j+8] = s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ csvec[j+12] = s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_ld(0,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_ld(16,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = vec_ld(32,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = vec_ld(48,csvec);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi = fz + i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi = fz + k1 - i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi0 = vec_ld(0,fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi1 = vec_ld(0,fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi2 = vec_ld(0,fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi3 = vec_ld(0,fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev1 = vec_ld(0,gi-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,gi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev2 = vec_ld(0,gi+k1-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,gi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev3 = vec_ld(0,gi+k2-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,gi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev4 = vec_ld(0,gi+k3-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,gi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vec_perm(vprev1,v1,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vec_perm(vprev2,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vec_perm(vprev3,v3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vec_perm(vprev4,v4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vfi1,vc2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vfi1,vs2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vfi3,vc2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(vfi3,vs2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(vgi1,vs2,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_nmsub(vgi1,vc2,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(vgi3,vs2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_nmsub(vgi3,vc2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf0 = vec_add(vfi0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_sub(vfi0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg0 = vec_add(vgi0,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg1 = vec_sub(vgi0,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_add(vfi2,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sub(vfi2,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg2 = vec_add(vgi2,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg3 = vec_sub(vgi2,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vf2,vc1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vf2,vs1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vg2,vs1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(vg2,vc1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(vg3,vs1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_nmsub(vg3,vc1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(vf3,vc1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_nmsub(vf3,vs1,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_add(vf0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_sub(vf0,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(vg1,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sub(vg1,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_add(vg0,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_sub(vg0,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vf1,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_sub(vf1,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,0,fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v15,0,fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v16,0,fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v11,vprev2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v12,vprev4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v13,vprev1,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v14,vprev3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v1,0,gi+k1-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v11,0,gi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v2,0,gi+k3-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v12,0,gi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v3,0,gi-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v13,0,gi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v4,0,gi+k2-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v14,0,gi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (fi<fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 1; i < kx; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT c2, s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c2 = 1 - (2 * s1) * s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -142,6 +389,7 @@ fht(FLOAT * fz, int n)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ tri += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } while (k4 < n);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/gain_analysis.c libmp3lame/gain_analysis.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index c94db78..c41c770 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/gain_analysis.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/gain_analysis.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -92,6 +92,10 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <stdio.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <stdlib.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <string.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -109,6 +113,67 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*lint -save -e736 loss of precision */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const Float_t ABYule[9][2 * YULE_ORDER + 1 + 3] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.03857599435200, -3.84664617118067, -0.02160367184185, 7.81501653005538, -0.00123395316851,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -11.34170355132042, -0.00009291677959, 13.05504219327545, -0.01655260341619,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -12.28759895145294, 0.02161526843274, 9.48293806319790, -0.02074045215285, -5.87257861775999,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.00594298065125, 2.75465861874613, 0.00306428023191, -0.86984376593551, 0.00012025322027,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.13919314567432, 0.00288463683916, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.05418656406430, -3.47845948550071, -0.02911007808948, 6.36317777566148, -0.00848709379851,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -8.54751527471874, -0.00851165645469, 9.47693607801280, -0.00834990904936, -8.81498681370155,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.02245293253339, 6.85401540936998, -0.02596338512915, -4.39470996079559, 0.01624864962975,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 2.19611684890774, -0.00240879051584, -0.75104302451432, 0.00674613682247, 0.13149317958808,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00187763777362, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.15457299681924, -2.37898834973084, -0.09331049056315, 2.84868151156327, -0.06247880153653,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -2.64577170229825, 0.02163541888798, 2.23697657451713, -0.05588393329856, -1.67148153367602,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.04781476674921, 1.00595954808547, 0.00222312597743, -0.45953458054983, 0.03174092540049,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.16378164858596, -0.01390589421898, -0.05032077717131, 0.00651420667831, 0.02347897407020,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00881362733839, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.30296907319327, -1.61273165137247, -0.22613988682123, 1.07977492259970, -0.08587323730772,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.25656257754070, 0.03282930172664, -0.16276719120440, -0.00915702933434, -0.22638893773906,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02364141202522, 0.39120800788284, -0.00584456039913, -0.22138138954925, 0.06276101321749,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.04500235387352, -0.00000828086748, 0.02005851806501, 0.00205861885564, 0.00302439095741,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02950134983287, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.33642304856132, -1.49858979367799, -0.25572241425570, 0.87350271418188, -0.11828570177555,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.12205022308084, 0.11921148675203, -0.80774944671438, -0.07834489609479, 0.47854794562326,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00469977914380, -0.12453458140019, -0.00589500224440, -0.04067510197014, 0.05724228140351,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.08333755284107, 0.00832043980773, -0.04237348025746, -0.01635381384540, 0.02977207319925,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.01760176568150, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.44915256608450, -0.62820619233671, -0.14351757464547, 0.29661783706366, -0.22784394429749,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.37256372942400, -0.01419140100551, 0.00213767857124, 0.04078262797139, -0.42029820170918,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.12398163381748, 0.22199650564824, 0.04097565135648, 0.00613424350682, 0.10478503600251,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.06747620744683, -0.01863887810927, 0.05784820375801, -0.03193428438915, 0.03222754072173,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.00541907748707, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.56619470757641, -1.04800335126349, -0.75464456939302, 0.29156311971249, 0.16242137742230,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.26806001042947, 0.16744243493672, 0.00819999645858, -0.18901604199609, 0.45054734505008,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.30931782841830, -0.33032403314006, -0.27562961986224, 0.06739368333110, 0.00647310677246,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.04784254229033, 0.08647503780351, 0.01639907836189, -0.03788984554840, 0.01807364323573,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00588215443421, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.58100494960553, -0.51035327095184, -0.53174909058578, -0.31863563325245, -0.14289799034253,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.20256413484477, 0.17520704835522, 0.14728154134330, 0.02377945217615, 0.38952639978999,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.15558449135573, -0.23313271880868, -0.25344790059353, -0.05246019024463, 0.01628462406333,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02505961724053, 0.06920467763959, 0.02442357316099, -0.03721611395801, 0.01818801111503,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00749618797172, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.53648789255105, -0.25049871956020, -0.42163034350696, -0.43193942311114, -0.00275953611929,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.03424681017675, 0.04267842219415, -0.04678328784242, -0.10214864179676, 0.26408300200955,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.14590772289388, 0.15113130533216, -0.02459864859345, -0.17556493366449, -0.11202315195388,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.18823009262115, -0.04060034127000, 0.05477720428674, 0.04788665548180, 0.04704409688120,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02217936801134, 0.0, 0.0, 0.0}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const Float_t ABButter[9][2 * BUTTER_ORDER + 1 + 3] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.98621192462708, -1.97223372919527, -1.97242384925416, 0.97261396931306, 0.98621192462708, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.98500175787242, -1.96977855582618, -1.97000351574484, 0.97022847566350, 0.98500175787242, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97938932735214, -1.95835380975398, -1.95877865470428, 0.95920349965459, 0.97938932735214, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97531843204928, -1.95002759149878, -1.95063686409857, 0.95124613669835, 0.97531843204928, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97316523498161, -1.94561023566527, -1.94633046996323, 0.94705070426118, 0.97316523498161, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.96454515552826, -1.92783286977036, -1.92909031105652, 0.93034775234268, 0.96454515552826, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.96009142950541, -1.91858953033784, -1.92018285901082, 0.92177618768381, 0.96009142950541, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.95856916599601, -1.91542108074780, -1.91713833199203, 0.91885558323625, 0.95856916599601, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.94597685600279, -1.88903307939452, -1.89195371200558, 0.89487434461664, 0.94597685600279, 0.0, 0.0, 0.0}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const Float_t ABYule[9][multiple_of(4, 2 * YULE_ORDER + 1)] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* 20 18 16 14 12 10 8 6 4 2 0 19 17 15 13 11 9 7 5 3 1 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ { 0.00288463683916, 0.00012025322027, 0.00306428023191, 0.00594298065125, -0.02074045215285, 0.02161526843274, -0.01655260341619, -0.00009291677959, -0.00123395316851, -0.02160367184185, 0.03857599435200, 0.13919314567432, -0.86984376593551, 2.75465861874613, -5.87257861775999, 9.48293806319790,-12.28759895145294, 13.05504219327545,-11.34170355132042, 7.81501653005538, -3.84664617118067},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -134,6 +199,7 @@ static const Float_t ABButter[9][multiple_of(4, 2 * BUTTER_ORDER + 1)] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.95856916599601, 0.91885558323625, -1.91713833199203, -1.91542108074780, 0.95856916599601},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.94597685600279, 0.89487434461664, -1.89195371200558, -1.88903307939452, 0.94597685600279}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*lint -restore */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -143,6 +209,191 @@ static const Float_t ABButter[9][multiple_of(4, 2 * BUTTER_ORDER + 1)] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* When calling this procedure, make sure that ip[-order] and op[-order] point to real data! */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++filterIntegrated (const Float_t* input, Float_t* output, Float_t* output2, size_t nSamples, const Float_t* kernel, const Float_t* kernel2)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,vbase;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vmask1,vmask2,vout1,vout2,vout3,vout4,vzero,vkernel1,vkernel2,vkernel3,vkernel4,vkernel5,vkernel6,vkernel7,vkernel8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vo1, vo2, vo3, vo4, vi2, vi3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vc1,vc2,vc3,vc4,vc5,vperm1,vperm2,vperm4,vperm5,vperm6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbase = (vector float)VINIT4ALL(1e-10f);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(24,25,26,27,16,17,18,19,8,9,10,11,0,1,2,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(28,29,30,31,20,21,22,23,12,13,14,15,4,5,6,7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_sl(vc1,vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_sl(vc3,vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_or(vc3,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector float)vec_splat_s32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_sro(v1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_sro(v1,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(48,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(64,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(80,kernel);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel1 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel2 = vec_perm(v1,v2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel3 = vec_perm(v3,v4,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel4 = vec_perm(v3,v4,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel5 = vec_perm(v5,v6,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel6 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel5 = vec_and(vkernel5,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel6 = vec_and(vkernel6,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,kernel2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,kernel2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel7 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel8 = vec_perm(v1,v2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel7 = vec_and(vkernel7,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkernel8 = vec_and(vkernel8,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_lvsl(0,input-7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = vec_lvsl(0,output-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,input-7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,input-7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,input-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(15,input-11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi2 = vec_perm(v2,v1,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vec_perm(v3,v4,vec_lvsl(0,input-10));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vec_sro(vi3,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,output-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,output-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,output-8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,output-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(15,output-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo1 = vec_perm(v2,v1,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo2 = vec_perm(v3,v2,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vec_perm(v4,v5,vec_lvsl(0,output-10));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vec_sro(vo3,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,output2-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,output2-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo4 = vec_perm(v2,v1,vec_lvsl(0,output2-2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo4 = vec_sro(vo4,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_lvsl(0,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = vec_lvsr(0,output);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* 1st loop */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v3,v1,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(v5,vkernel1,vbase);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo1,vkernel2,vbase);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(vi2,vkernel3,vout1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo2,vkernel4,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(vi3,vkernel5,vout1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo3,vkernel6,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vec_sld(vi3,vi2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi2 = vec_sld(vi2,v5,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_sub(vout1,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_slo(vout1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_slo(vout1,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_slo(vout1,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_add(vout1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_add(vout1,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vec_sld(vo3,vo2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo2 = vec_sld(vo2,vo1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo1 = vec_sld(vo1,vout1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_perm(vout1,vout1,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vout2,0,output);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++output;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++input;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ --nSamples;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while(nSamples--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_lvsl(0,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = vec_lvsr(0,output);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm6 = vec_lvsr(0,output2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(15,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,input-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v3,v1,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(v5,vkernel1,vbase);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo1,vkernel2,vbase);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(vi2,vkernel3,vout1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo2,vkernel4,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_madd(vi3,vkernel5,vout1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_madd(vo3,vkernel6,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_nmsub(vo4,vkernel8,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_madd(vo1,vkernel7,vout3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vec_sld(vi3,vi2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi2 = vec_sld(vi2,v5,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_sub(vout1,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_slo(vout1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_slo(vout1,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_slo(vout1,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_add(vout1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout1 = vec_add(vout1,vout2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vec_sld(vo3,vo2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo2 = vec_sld(vo2,vo1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo1 = vec_sld(vo1,vout1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_slo(vout4,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_slo(vout4,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_slo(vout4,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_add(vout4,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_add(v5,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_add(vout3,vout4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo4 = vec_sld(vo4,vout3,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout2 = vec_perm(vout1,vout1,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_perm(vout3,vout3,vperm6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vout2,0,output);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vout4,0,output2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++output;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++output2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++input;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm6 = vec_lvsr(0,output2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_nmsub(vo4,vkernel8,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_madd(vo1,vkernel7,vout3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_slo(vout4,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_slo(vout4,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_slo(vout4,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_add(vout4,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout3 = vec_add(vout3,vout4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout4 = vec_perm(vout3,vout3,vperm6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vout4,0,output2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ filterYule(const Float_t * input, Float_t * output, size_t nSamples, const Float_t * const kernel)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -189,7 +440,7 @@ filterButter(const Float_t * input, Float_t * output, size_t nSamples, const Flo
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int ResetSampleFrequency(replaygain_t * rgData, long samplefreq);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -323,6 +574,10 @@ AnalyzeSamples(replaygain_t * rgData, const Float_t * left_samples, const Float_
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curright = right_samples + cursamplepos;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ filterIntegrated(curleft, rgData->lstep + rgData->totsamp, rgData->lout + rgData->totsamp, cursamples, ABYule[rgData->freqindex], ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ filterIntegrated(curright, rgData->rstep + rgData->totsamp, rgData->rout + rgData->totsamp, cursamples, ABYule[rgData->freqindex], ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ YULE_FILTER(curleft, rgData->lstep + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABYule[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ YULE_FILTER(curright, rgData->rstep + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -332,6 +587,7 @@ AnalyzeSamples(replaygain_t * rgData, const Float_t * left_samples, const Float_
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ BUTTER_FILTER(rgData->rstep + rgData->totsamp, rgData->rout + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curleft = rgData->lout + rgData->totsamp; /* Get the squared values */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curright = rgData->rout + rgData->totsamp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/lame.c libmp3lame/lame.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index cb82225..4b3290d 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/lame.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/lame.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -30,6 +30,9 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -603,7 +606,12 @@ lame_init_params(lame_global_flags * gfp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->CPU_features.SSE = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->CPU_features.SSE2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* turn off JAVA mode explicitly */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned short vscr = vec_mfvscr();
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vscr = vec_or(vscr,(vector unsigned short)VINIT8(0,0,0,0,0,0,1,0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_mtvscr(vscr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ cfg->vbr = gfp->VBR;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ cfg->error_protection = gfp->error_protection;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/machine.h libmp3lame/machine.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index bf6fff2..4fc1e70 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/machine.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/machine.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -184,6 +184,24 @@ typedef FLOAT sample_t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifdef __APPLE_CC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT4(a,b,c,d) (a,b,c,d)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT8(a,b,c,d,e,f,g,h) (a,b,c,d,e,f,g,h)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT16(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) (a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT4ALL(a) (a,a,a,a)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT8ALL(a) (a,a,a,a,a,a,a,a)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT16ALL(a) (a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT4(a,b,c,d) {a,b,c,d}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT8(a,b,c,d,e,f,g,h) {a,b,c,d,e,f,g,h}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT16(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT4ALL(a) {a,a,a,a}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT8ALL(a) {a,a,a,a,a,a,a,a}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define VINIT16ALL(a) {a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* end of machine.h */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/newmdct.c libmp3lame/newmdct.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 596cac9..328c38b 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/newmdct.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/newmdct.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -30,6 +30,10 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -39,7 +43,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifndef USE_GOGO_SUBBAND
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const FLOAT enwindow[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const FLOAT enwindow[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -4.77e-07 * 0.740951125354959 / 2.384e-06, 1.03951e-04 * 0.740951125354959 / 2.384e-06,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 9.53674e-04 * 0.740951125354959 / 2.384e-06, 2.841473e-03 * 0.740951125354959 / 2.384e-06,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 3.5758972e-02 * 0.740951125354959 / 2.384e-06, 3.401756e-03 * 0.740951125354959 / 2.384e-06, 9.83715e-04 * 0.740951125354959 / 2.384e-06, 9.9182e-05 * 0.740951125354959 / 2.384e-06, /* 15 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -230,7 +234,7 @@ static const FLOAT enwindow[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define NS 12
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define NL 36
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const FLOAT win[4][NL] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const FLOAT win[4][NL] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 2.382191739347913e-13,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 6.423305872147834e-13,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -435,6 +439,443 @@ window_subband(const sample_t * x1, FLOAT a[SBLIMIT])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const sample_t *x2 = &x1[238 - 14 - 286];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vw1,vw2,vw3,vw4,vw5,vw6,vw7,vw8,vs,vt,vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm2,vperm3,vperm4,vperm5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = (vector unsigned char)VINIT16(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = vec_lvsl(0,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm3 = (vector unsigned char)VINIT16(0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_lvsl(0,x1+1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_perm(vperm4,vperm4,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(i=0;i<3;i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wp-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,wp-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(16,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(32,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+26);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wp+26);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(16,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(32,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v6,v7,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v8,v9,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_mergeh(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_mergeh(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergel(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergel(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_mergel(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_mergel(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw5 = vec_mergeh(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw6 = vec_mergel(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw3 = vec_mergeh(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw7 = vec_mergeh(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw4 = vec_mergel(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw8 = vec_mergel(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-224);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw1,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+221);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+221);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw1,v6,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-160);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw2,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+157);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+157);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw2,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-96);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw3,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+93);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+93);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw3,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw4,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+29);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+29);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw4,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw5,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-35);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-35);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw5,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+96);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw6,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-99);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-99);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw6,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+160);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw7,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-163);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-163);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw7,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+224);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw8,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-227);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-227);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw8,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wp-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,wp-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(16,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(32,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wp+34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(16,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(32,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v6,v7,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v8,v9,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_mergeh(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_mergeh(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergel(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergel(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_mergel(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_mergel(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw5 = vec_mergeh(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw6 = vec_mergel(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw3 = vec_mergeh(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw7 = vec_mergeh(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw4 = vec_mergel(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw8 = vec_mergel(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+256);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw1,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-259);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-259);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw1,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+192);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw2,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-195);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-195);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw2,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+128);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw3,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-131);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-131);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw3,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+64);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw4,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-67);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-67);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw4,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw5,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw5,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-64);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw6,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+61);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+61);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw6,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-128);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw7,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+125);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+125);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw7,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-192);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw8,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+189);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+189);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw8,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*end*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,wp+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16,wp+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v4,v5,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(0,wp+42);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,wp+60);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_ld(16,wp+60);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v10,v11,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergeh(v3,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergeh(v6,v12);;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vs,vw1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sub(vt,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(vt,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vw2,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_mergeh(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_mergel(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v4,0,a+i*8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v5,16,a+i*8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wp += 72;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1-=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2+=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wp-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,wp-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(16,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(32,wp+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+26);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wp+26);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(16,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(32,wp+44);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v6,v7,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v8,v9,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_mergeh(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_mergeh(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergel(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergel(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_mergel(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_mergel(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw5 = vec_mergeh(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw6 = vec_mergel(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw3 = vec_mergeh(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw7 = vec_mergeh(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw4 = vec_mergel(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw8 = vec_mergel(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-224);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw1,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+221);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+221);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw1,v6,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-160);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw2,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+157);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+157);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw2,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-96);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw3,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+93);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+93);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw3,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw4,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+29);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+29);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw4,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw5,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-35);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-35);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw5,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+96);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw6,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-99);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-99);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw6,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+160);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw7,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-163);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-163);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw7,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+224);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw8,v3,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-227);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-227);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_madd(vw8,v6,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wp-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,wp-2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(16,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(32,wp+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wp+34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(16,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(32,wp+52);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v6,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v6,v7,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v8,v9,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_mergeh(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_mergeh(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergel(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergel(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_mergel(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_mergel(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw5 = vec_mergeh(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw6 = vec_mergel(v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw3 = vec_mergeh(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw7 = vec_mergeh(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw4 = vec_mergel(v13,v15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw8 = vec_mergel(v14,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+256);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw1,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-259);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-259);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw1,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+192);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw2,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-195);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-195);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw2,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+128);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw3,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-131);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-131);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw3,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2+64);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw4,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-67);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-67);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw4,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw5,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw5,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-64);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw6,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+61);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+61);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw6,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-128);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw7,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+125);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+125);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw7,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,x2-192);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vec_nmsub(vw8,v3,vt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,x1+189);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,x1+189);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v5,v4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vw8,v6,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*end*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,wp+6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,wp+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16,wp+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v4,v5,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_ld(0,wp+42);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,wp+60);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_ld(16,wp+60);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v10,v11,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_mergeh(v3,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_mergeh(v6,v12);;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw1 = vec_mergeh(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vec_mergel(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vec_madd(vs,vw1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sub(vt,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(vt,vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(vw2,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,a+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_mergeh(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_mergel(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v6,v4,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v5,0,a+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v7,16,a+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wp += 54;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1-=3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2+=3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = -15; i < 0; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT w, s, t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -501,6 +942,7 @@ window_subband(const sample_t * x1, FLOAT a[SBLIMIT])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x1--;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x2++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT s, t, u, v;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ t = x1[-16] * wp[-10];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/psymodel.c libmp3lame/psymodel.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 60076ee..a168605 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/psymodel.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/psymodel.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -146,6 +146,10 @@ blocktype_d[2] block type to use for previous granule
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <float.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -164,6 +168,48 @@ blocktype_d[2] block type to use for previous granule
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define LN_TO_LOG10 0.2302585093
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline vector float fast_log10_altivec_2(vector float v3)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float va,vb,vc,vhalf,vzero,vsqrt2,vconst4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v4,v5,v6,v7,v8,vz,vz2,vlog;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vconst1,vconst2,vshamt;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vconst3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = (vector float)VINIT4ALL(0.8685890659);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = (vector float)VINIT4ALL(0.2894672153);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc = (vector float)VINIT4ALL(0.1793365895);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = (vector float)VINIT4ALL(0.15051499783);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(1.4142135623731);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst4 = (vector float)VINIT4ALL(0.301029995664);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst1 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(9));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_nor(vconst2,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst3 = (vector signed int)vec_rl(vconst2,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vec_splat_u32(9),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vshamt,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_sl((vector unsigned int)vconst3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector float)vec_sel(vconst2,(vector unsigned int)v3,vconst1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz = vec_madd(v6, vec_madd(vec_nmsub(v7,v5,(vector float)vconst2),v7,v7), vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = (vector float)vec_sr((vector unsigned int)v3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_ctf(vec_sub((vector signed int)v8,vconst3),0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz2 = vec_madd(vz,vz,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vlog,vconst4,vhalf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vz2,vc,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vz2,v1,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vz,v2,vlog);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return vlog;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ L3psycho_anal. Compute psycho acoustics.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -253,6 +299,11 @@ static const FLOAT ma_max_i1 = 3.6517412725483771;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const FLOAT ma_max_i2 = 31.622776601683793;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* pow(10, (MLIMIT) / 10.0); */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const FLOAT ma_max_m = 31.622776601683793;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const vector float vmamax1 = (vector float)VINIT4ALL(3.651741);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const vector float vmamax2 = (vector float)VINIT4ALL(31.622777);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*This is the masking table:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ According to tonality, values are going from 0dB (TMN)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -666,6 +717,14 @@ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2], int chn,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int gr_out, FLOAT fftenergy[HBLKSIZE], FLOAT(*wsamp_l)[BLKSIZE])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,vhalf,vprev,vzero,vsqrt2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = vec_ctf(vec_splat_s32(1),1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(0.7071067811865001);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm = (vector unsigned char)VINIT16(0,1,2,3,28,29,30,31,24,25,26,27,20,21,22,23);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -675,19 +734,80 @@ vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fft_long(gfc, *wsamp_l, chn, buffer);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else if (chn == 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* FFT data for mid and side channel is derived from L & R */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < BLKSIZE; j += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wsamp_l[0]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,wsamp_l[1]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(16,wsamp_l[0]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wsamp_l[1]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sub(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v5,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v6,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v7,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v8,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,wsamp_l[0]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,0,wsamp_l[1]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v11,16,wsamp_l[0]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v12,16,wsamp_l[1]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const l = wsamp_l[0][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const r = wsamp_l[1][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_l[0][j] = (l + r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_l[1][j] = (l - r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * compute energies
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vec_ld(0,(*wsamp_l));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < BLKSIZE/2; j += 16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,(*wsamp_l)+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,(*wsamp_l)+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,(*wsamp_l)+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(48,(*wsamp_l)+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(48,(*wsamp_l)+1008-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(32,(*wsamp_l)+1008-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(16,(*wsamp_l)+1008-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,(*wsamp_l)+1008-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(vprev,v5,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v5,v6,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v6,v7,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v7,v8,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(v1,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(v2,v2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(v3,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(v4,v4,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(v9,v9,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_madd(v10,v10,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(v11,v11,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v12,v12,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v5,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v6,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v7,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v8,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,16,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v11,32,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v12,48,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vprev,vprev,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v1,0,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[0] = wsamp_l[0][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[0] *= fftenergy[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -696,13 +816,51 @@ vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const im = (*wsamp_l)[BLKSIZE / 2 + j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[BLKSIZE / 2 - j] = (re * re + im * im) * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* total energy */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(0,fftenergy+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0,fftenergy+508);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(0,fftenergy+512);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_xor(v8,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_sld(v5,v8,12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_sld(v8,v7,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,fftenergy+11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0,fftenergy+508);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_lde(0,fftenergy+512);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_xor(v8,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=12;j<508;j+=16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(48,fftenergy+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v1,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_add(v2,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v3,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_add(v4,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v5,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sld(v5,v5,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_sld(v5,v5,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sld(v5,v5,12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v5,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v5,vec_lvsr(0, psv->tot_ener+chn));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v5,0,psv->tot_ener+chn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT totalenergy = 0.0f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = 11; j < HBLKSIZE; j++)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ totalenergy += fftenergy[j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ psv->tot_ener[chn] = totalenergy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (plt) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -716,27 +874,96 @@ vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ vbrpsy_compute_fft_s(lame_internal_flags const *gfc, const sample_t * const buffer[2], int chn,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- int sblock, FLOAT(*fftenergy_s)[HBLKSIZE_s], FLOAT(*wsamp_s)[3][BLKSIZE_s])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int sblock, FLOAT(*fftenergy_s)[HBLKSIZE_s+3], FLOAT(*wsamp_s)[3][BLKSIZE_s])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,vhalf,vprev,vzero,vsqrt2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = vec_ctf(vec_splat_s32(1),1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(0.7071067811865001);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm = (vector unsigned char)VINIT16(0,1,2,3,28,29,30,31,24,25,26,27,20,21,22,23);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (sblock == 0 && chn < 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fft_short(gfc, *wsamp_s, chn, buffer);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (chn == 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* FFT data for mid and side channel is derived from L & R */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < BLKSIZE_s; j += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,wsamp_s[0][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,wsamp_s[1][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(16,wsamp_s[0][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,wsamp_s[1][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_add(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sub(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v5,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v6,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v7,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v8,vsqrt2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,wsamp_s[0][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,0,wsamp_s[1][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v11,16,wsamp_s[0][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v12,16,wsamp_s[1][sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE_s - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const l = wsamp_s[0][sblock][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const r = wsamp_s[1][sblock][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_s[0][sblock][j] = (l + r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_s[1][sblock][j] = (l - r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * compute energies
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vec_ld(0,(*wsamp_s)[sblock]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < BLKSIZE_s/2; j += 16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,(*wsamp_s)[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,(*wsamp_s)[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,(*wsamp_s)[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(48,(*wsamp_s)[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(48,(*wsamp_s)[sblock]+240-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(32,(*wsamp_s)[sblock]+240-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(16,(*wsamp_s)[sblock]+240-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(0,(*wsamp_s)[sblock]+240-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(vprev,v5,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v5,v6,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v6,v7,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v7,v8,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(v1,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(v2,v2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(v3,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(v4,v4,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_madd(v9,v9,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_madd(v10,v10,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_madd(v11,v11,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v12,v12,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v5,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v6,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v7,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v8,vhalf,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v9,0,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,16,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v11,32,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v12,48,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vprev,vprev,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v1,0,fftenergy_s[sblock]+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy_s[sblock][0] = (*wsamp_s)[sblock][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy_s[sblock][0] *= fftenergy_s[sblock][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE_s / 2 - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -744,6 +971,7 @@ vbrpsy_compute_fft_s(lame_internal_flags const *gfc, const sample_t * const buff
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const im = (*wsamp_s)[sblock][BLKSIZE_s / 2 + j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy_s[sblock][BLKSIZE_s / 2 - j] = (re * re + im * im) * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -772,7 +1000,24 @@ vbrpsy_attack_detection(lame_internal_flags * gfc, const sample_t * const buffer
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT energy[4], FLOAT sub_short_factor[4][3], int ns_attacks[4][4],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int uselongblock[2])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT ns_hpfsmpl[2][576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vsum,vsum1,vsum2,vsuma,vsumb,vsumc,vsumd,vmaska,vmaskb,vmaskc,vmaskd;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask1,vmask2,vmask3,vmask4,vmask1inv,vmask2inv,vmask3inv,vmask4inv,vperm,vs4,vs8,vs12;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm = (vector unsigned char)VINIT16(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector float)vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector float)vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs4 = vec_sl((vector unsigned char)v1,(vector unsigned char)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs8 = vec_sl(vs4,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs12 = vec_or(vs4,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector float)vec_splat_s32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmaska = vec_slo(v3,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmaskb = vec_sro(vmaska,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmaskc = vec_sro(vmaska,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmaskd = vec_sro(vmaska,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT ns_hpfsmpl[2][576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *const psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -785,14 +1030,142 @@ vbrpsy_attack_detection(lame_internal_flags * gfc, const sample_t * const buffer
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Don't copy the input buffer into a temporary buffer */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* unroll the loop 2 times */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (chn = 0; chn < n_chn_out; chn++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- static const FLOAT fircoef[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ static const FLOAT fircoef[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- -5.52212e-17 * 2, -0.313819 * 2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -5.52212e-17 * 2, -0.313819 * 2, 0.0, 0.0
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* apply high pass filter of fs/4 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const sample_t *const firbuf = &buffer[chn][576 - 350 - NSFIRLEN + 192];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0, firbuf+10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_lvsl(0, firbuf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_lvsl(0, firbuf+1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask3 = vec_lvsl(0, firbuf+2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask4 = vec_lvsl(0, firbuf+3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1inv = vec_perm(vmask1,vmask1,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2inv = vec_perm(vmask2,vmask2,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask3inv = vec_perm(vmask3,vmask3,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask4inv = vec_perm(vmask4,vmask4,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(i=0;i<576;) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,firbuf+i+10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(v1, v2, vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_splat(vsum1, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_and(vsum2, vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,firbuf+i+NSFIRLEN-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=0;j<(NSFIRLEN-1)/2;j+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16, firbuf+i+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0, firbuf+i+NSFIRLEN-3-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v5,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v4,vmask3inv);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,fircoef+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_madd(v10,v11,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_slo(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_slo(vsum,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_slo(vsum,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vsum,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_add(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsuma = vec_add(v15,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsuma = vec_and(vsuma,vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_splat(vsum1, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_and(vsum2, vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,firbuf+i+NSFIRLEN-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_lvsl(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=0;j<(NSFIRLEN-1)/2;j+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16, firbuf+i+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0, firbuf+i+NSFIRLEN-3-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v5,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v4,vmask4inv);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,fircoef+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_madd(v10,v11,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sro(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_slo(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_slo(vsum,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vsum,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_add(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumb = vec_add(v15,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumb = vec_and(vsumb,vmaskb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_splat(vsum1, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_and(vsum2, vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,firbuf+i+NSFIRLEN-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_lvsl(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=0;j<(NSFIRLEN-1)/2;j+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16, firbuf+i+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0, firbuf+i+NSFIRLEN-3-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v5,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v4,vmask1inv);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,fircoef+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_madd(v10,v11,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sro(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sro(vsum,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_slo(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vsum,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_add(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumc = vec_add(v15,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumc = vec_and(vsumc,vmaskc);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_splat(vsum1, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_and(vsum2, vmaska);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(16,firbuf+i+NSFIRLEN-3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_lvsl(0, firbuf+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j=0;j<(NSFIRLEN-1)/2;j+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(16, firbuf+i+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(0, firbuf+i+NSFIRLEN-3-j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v5,vmask4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v4,vmask2inv);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_ld(0,fircoef+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_add(v7,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_madd(v10,v11,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sro(vsum,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sro(vsum,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_sro(vsum,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_add(vsum,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_add(v13,v14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumd = vec_add(v15,v16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsumd = vec_and(vsumd,vmaskd);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_or(vsuma,vsumb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_or(vsumc,vsumd);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_or(vsum1,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vsum,0,ns_hpfsmpl[chn]+i-4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i < 576; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT sum1, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ sum1 = firbuf[i + 10];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -803,6 +1176,7 @@ vbrpsy_attack_detection(lame_internal_flags * gfc, const sample_t * const buffer
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ns_hpfsmpl[chn][i] = sum1 + sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ masking_ratio[gr_out][chn].en = psv->en[chn];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ masking_ratio[gr_out][chn].thm = psv->thm[chn];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (n_chn_psy > 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -841,9 +1215,28 @@ vbrpsy_attack_detection(lame_internal_flags * gfc, const sample_t * const buffer
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i < 9; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const *const pfe = pf + 576 / 9;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT p = 1.;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT vmax[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector float)vec_splat_s32(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ctf((vector signed int)v1,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; pf < pfe; pf+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,pf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_abs(v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_max(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_slo(v2,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_slo(v2,vs8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_slo(v2,vs12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_max(v2,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_max(v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 =vec_max(v8,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v10,0,vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ p = vmax[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (; pf < pfe; pf++)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (p < fabs(*pf))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ p = fabs(*pf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ psv->last_en_subshort[chn][i] = en_subshort[i + 3] = p;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ en_short[1 + i / 3] += p;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (p > en_subshort[i + 3 - 2]) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1039,7 +1432,7 @@ vbrpsy_calc_mask_index_s(lame_internal_flags const *gfc, FLOAT const *max,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-vbrpsy_compute_masking_s(lame_internal_flags * gfc, const FLOAT(*fftenergy_s)[HBLKSIZE_s],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++vbrpsy_compute_masking_s(lame_internal_flags * gfc, const FLOAT(*fftenergy_s)[HBLKSIZE_s+3],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT * eb, FLOAT * thr, int chn, int sblock)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *const psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1147,24 +1540,286 @@ vbrpsy_compute_masking_l(lame_internal_flags * gfc, const FLOAT fftenergy[HBLKSI
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *const psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT max[CBANDS], avg[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- unsigned char mask_idx_l[CBANDS + 2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT max[CBANDS] __attribute__ ((aligned (16))), avg[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned char mask_idx_l[CBANDS + 2] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int k, b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float tmp[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char v31 = (vector unsigned char)VINIT16ALL(31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vmask1 = (vector unsigned int)VINIT4ALL(0xff);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector signed int vone = (vector signed int)VINIT4ALL(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtab1 = (vector unsigned int)VINIT4(0x3f800000,0x3f4b5936,0x3f218698,0x3f218698);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtab2 = (vector unsigned int)VINIT4(0x3f218698,0x3f218698,0x3f218698,0x3e809bfa);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtab3 = (vector unsigned int)VINIT4(0x3df09e99,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtable1 = (vector unsigned int)VINIT4(0x3fe39e89,0x3fec53e5,0x3ff55ea7,0x3ff9149b);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtable2 = (vector unsigned int)VINIT4(0x3ffcd90e,0x3fea8f7b,0x3fd997da,0x3fbf84e2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned int vtable3 = (vector unsigned int)VINIT4(0x3fa8917c,0x3f800000,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Calculate the energy and the tonality of each partition.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ calc_energy(gdl, fftenergy, eb_l, max, avg);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ calc_mask_index_l(gfc, max, avg, mask_idx_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char vmaskidx1 = vec_ld(0,mask_idx_l); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char vmaskidx2 = vec_ld(16,mask_idx_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char vmaskidx3 = vec_ld(32,mask_idx_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector unsigned char vmaskidx4 = vec_ld(48,mask_idx_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = gfc->sv_qnt.masking_lower;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vmasking_lower_coeff = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmasking_lower_coeff = vec_splat(vmasking_lower_coeff,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * convolve the partitioned energy and unpredictability
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * with the spreading function, s3_l[b][k]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ k = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- for (b = 0; b < gdl->npart; b++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (b = 0; b < gdl->npart-3; b+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v1,v2,v3,v4,v5,vkk,vkk2,vlast,vdd,vdd_n,vk,vk2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vf1,vf2,vf3,vf4,vecb,vx,veb,vavgmask,vmasking_lower;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmasking_lower = vec_ld(0,gdl->masking_lower+b);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmasking_lower = vec_madd(vmasking_lower,vmasking_lower_coeff,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int tmp2[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int tmp3[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,gdl->s3ind[b]); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,gdl->s3ind[b+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(0,gdl->s3ind[b+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(0,gdl->s3ind[b+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_mergeh(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_mergeh(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkk = vec_mergeh(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlast = vec_mergel(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sub(vlast,vkk);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sel(v1,(vector signed int)vzero,vec_cmpgt((vector signed int)vzero,v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v1,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[0] = k;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[1] = k+tmp2[0]+1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[2] = k+tmp2[0]+tmp2[1]+2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[3] = k+tmp2[0]+tmp2[1]+tmp2[2]+3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ k = k+tmp2[0]+tmp2[1]+tmp2[2]+tmp2[3]+4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vk = vec_ld(0,tmp3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_perm(vmaskidx1,vmaskidx2,(vector unsigned char)vkk);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_perm(vmaskidx3,vmaskidx4,(vector unsigned char)vkk);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_sel(v1,v2,vec_cmpgt(vkk,(vector signed int)VINIT4ALL(31)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_and(vdd,(vector signed int)vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd_n = vone;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = gdl->s3[tmp3[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[1] = gdl->s3[tmp3[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[2] = gdl->s3[tmp3[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[3] = gdl->s3[tmp3[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vkk,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = eb_l[tmp2[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[1] = eb_l[tmp2[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[2] = eb_l[tmp2[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[3] = eb_l[tmp2[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ veb = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vecb = vec_madd(vf1,veb,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(vdd,vec_splat_u32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(v1,vec_splat_s32(1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_add(v1,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(-8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sl(v2,vec_splat_u32(-16));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sl(v3,vec_splat_u32(8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_or(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = (vector float)vec_perm(vtab1,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = (vector float)vec_perm(vtab3,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_sel(vf1,vf2,(vector unsigned int)vec_cmpgt((vector unsigned char)v1,v31));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vecb = vec_madd(vecb,vf2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkk = vec_add(vkk,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vk = vec_add(vk,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while(vec_any_le(vkk,vlast)) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkk2 = vec_sel(vkk,vlast,vec_cmpgt(vkk,vlast));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vk2 = vec_sel(vk,(vector signed int)vzero,vec_cmpgt(vkk,vlast));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_perm(vmaskidx1,vmaskidx2,(vector unsigned char)vkk2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_perm(vmaskidx3,vmaskidx4,(vector unsigned char)vkk2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sel(v1,v2,vec_cmpgt(vkk2,(vector signed int)VINIT4ALL(31)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_and(v1,(vector signed int)vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_cmpgt(vkk,vlast);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_nor(v2,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_and(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_and(vone,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_add(vdd,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd_n = vec_add(vdd_n,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vk2,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = gdl->s3[tmp2[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[1] = gdl->s3[tmp2[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[2] = gdl->s3[tmp2[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[3] = gdl->s3[tmp2[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vkk,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[0] = eb_l[tmp2[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[1] = eb_l[tmp2[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[2] = eb_l[tmp2[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp[3] = eb_l[tmp2[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ veb = vec_ld(0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_madd(vf1,veb,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v5,vec_splat_u32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(v1,vec_splat_s32(1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_add(v1,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(-8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sl(v2,vec_splat_u32(-16));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sl(v3,vec_splat_u32(8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_or(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = (vector float)vec_perm(vtab1,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = (vector float)vec_perm(vtab3,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_sel(vf1,vf2,(vector unsigned int)vec_cmpgt((vector unsigned char)v1,v31));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_madd(vx,vf2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vratio,vout,vf5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_sel(vecb,vzero,vec_cmplt(vecb,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_sel(vx,vzero,vec_cmplt(vx,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf1,vf2,vec_cmpgt(vf2,vf1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf4 = vec_sel(vf2,vf1,vec_cmpgt(vf2,vf1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = vec_re(vf4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vratio = vec_madd(vf3,vec_madd(vec_nmsub(vf4,vf5,(vector float)VINIT4ALL(1.0)),vf5,vf5),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2[0] = b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2[1] = b+1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2[2] = b+2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2[3] = b+3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[0] = mask_add_delta(mask_idx_l[b]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[1] = mask_add_delta(mask_idx_l[b+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[2] = mask_add_delta(mask_idx_l[b+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3[3] = mask_add_delta(mask_idx_l[b+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sub(vkk2,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(0,tmp3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_abs(v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = (vector signed int)vec_cmpgt(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector signed int)vec_cmpge(vratio,vmamax1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf4 = vec_add(vf1,vf2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(vec_any_eq(vec_or(v5,v3),(vector signed int)vzero)) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = fast_log10_altivec_2(vratio);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_cts(vf3,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(v1,vec_splat_s32(1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_add(v1,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(-8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sl(v2,vec_splat_u32(-16));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sl(v3,vec_splat_u32(8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_or(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = (vector float)vec_perm(vtable1,vtable2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = (vector float)vec_perm(vtable3,vtable2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = vec_sel(vf3,vf5,(vector unsigned int)vec_cmpgt((vector unsigned char)v1,v31));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = vec_madd(vf4,vf5,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf5 = vec_sel(vf5,vf4,vec_cmpge(vratio,vmamax1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ else vf5 = vf4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vf1,vf2,vec_cmpgt(vf2,vf1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vout,vf4,vec_cmpgt(vmamax2,vratio));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vf5,vout,(vector unsigned int)v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vout,vecb,(vector unsigned int)vec_cmple(vx,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vout = vec_sel(vout,vx,(vector unsigned int)vec_cmple(vecb,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vecb = vec_sel(vout,vecb,vec_cmpgt(vkk,vlast));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vkk = vec_add(vkk,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vk = vec_add(vk,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_sl(vdd,(vector unsigned int)vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd_n = vec_sl(vdd_n,(vector unsigned int)vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_add(vdd,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ctf(vdd,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_ctf(vdd_n,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_re(vf2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_madd(vf1,vf2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vdd = vec_cts(vf1,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(vdd,vec_splat_u32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(v1,vec_splat_s32(1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_add(v1,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vec_splat_s32(2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sl(v1,vec_splat_u32(-8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sl(v2,vec_splat_u32(-16));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sl(v3,vec_splat_u32(8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_or(v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_or(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = (vector float)vec_perm(vtab1,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = (vector float)vec_perm(vtab3,vtab2,(vector unsigned char)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_sel(vf1,vf2,(vector unsigned int)vec_cmpgt((vector unsigned char)v1,v31));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_ctf(vone,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vavgmask = vec_madd(vf1,vf2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vecb = vec_madd(vecb,vavgmask,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf4 = vec_ld(0,eb_l+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (psv->blocktype_old[chn & 0x01] == SHORT_TYPE) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,psv->nb_l1[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_madd(vf1,(vector float)VINIT4ALL(rpelev),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_madd(vf4,(vector float)VINIT4ALL(NS_PREECHO_ATT2),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf2,vf3,vec_cmpgt(vf3,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_min(vecb,vf3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //vec_st(vf3,0,thr+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,psv->nb_l1[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_ld(0,psv->nb_l2[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_madd(vf1,(vector float)VINIT4ALL(rpelev),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_madd(vf2,(vector float)VINIT4ALL(rpelev2),vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vzero,vf3,vec_cmpgt(vf3,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vec_sel(vzero,vf2,vec_cmpgt(vf2,vzero));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (psv->blocktype_old[chn & 0x01] == NORM_TYPE) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_min(vf3,vf2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_min(vecb,vf3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //vec_st(vf3,0,thr+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vf1,0,psv->nb_l2[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vecb,0,psv->nb_l1[chn]+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_ld(0,max+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_ld(0,gdl->minval+b);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_madd(vx,vf1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vec_madd(vx,vavgmask,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf3,vx,vec_cmpgt(vf3,vx));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //vec_st(vf3,0,thr+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_cmpgt(vmasking_lower,(vector float)VINIT4ALL(1.0f));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_madd(vf3,vmasking_lower,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf3,vf1,(vector unsigned int)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf3,vf4,vec_cmpgt(vf3,vf4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vec_madd(vf3,vmasking_lower,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vec_sel(vf1,vf3,(vector unsigned int)v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vf3,0,thr+b); //needs to be aligned
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ b=0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; b < gdl->npart; b++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT x, ecb, avg_mask, t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const masking_lower = gdl->masking_lower[b] * gfc->sv_qnt.masking_lower;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //fprintf(stderr,"%f\n",masking_lower);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* convolve the partitioned energy with the spreading function */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int kk = gdl->s3ind[b][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int const last = gdl->s3ind[b][1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1423,11 +2078,11 @@ L3psycho_anal_vbr(lame_internal_flags * gfc,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* fft and energy calculation */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT(*wsamp_l)[BLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT(*wsamp_s)[3][BLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT fftenergy[HBLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT fftenergy_s[3][HBLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT wsamp_L[2][BLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT wsamp_S[2][3][BLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT eb[4][CBANDS], thr[4][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT fftenergy[HBLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT fftenergy_s[3][HBLKSIZE_s+3] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT wsamp_L[2][BLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT wsamp_S[2][3][BLKSIZE_s] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT eb[4][CBANDS] __attribute__ ((aligned (16))), thr[4][CBANDS] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT sub_short_factor[4][3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT thmm;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1436,7 +2091,7 @@ L3psycho_anal_vbr(lame_internal_flags * gfc,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ (cfg->msfix > 0.f) ? (cfg->ATH_offset_factor * gfc->ATH->adjust_factor) : 1.f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const FLOAT(*const_eb)[CBANDS] = (const FLOAT(*)[CBANDS]) eb;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- const FLOAT(*const_fftenergy_s)[HBLKSIZE_s] = (const FLOAT(*)[HBLKSIZE_s]) fftenergy_s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const FLOAT(*const_fftenergy_s)[HBLKSIZE_s+3] = (const FLOAT(*)[HBLKSIZE_s+3]) fftenergy_s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* block type */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int ns_attacks[4][4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1824,7 +2479,7 @@ compute_bark_values(PsyConst_CB2SB_t const *gd, FLOAT sfreq, int fft_size,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-init_s3_values(FLOAT ** p, int (*s3ind)[2], int npart,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++init_s3_values(FLOAT ** p, int (*s3ind)[4], int npart,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const *bval, FLOAT const *bval_width, FLOAT const *norm)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT s3[CBANDS][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/quantize.c libmp3lame/quantize.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 9ba9c16..6064b29 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/quantize.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/quantize.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -28,6 +28,10 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -42,7 +46,32 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline double ppc_sqrt(double x) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ double y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ asm("fsqrt %0,%1" : "=f" (y) : "f" (x));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline double __frsqrte(double number)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ double y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ asm("frsqrte %0,%1" : "=f" (y) : "f" (number));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline double ppc_sqrt(double x) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ double y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const double halfx = 0.5 * x;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y = __frsqrte(x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y *= 1.5 - halfx * y * y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y *= 1.5 - halfx * y * y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y *= 1.5 - halfx * y * y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //y *= 1.5 - halfx * y * y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y *= x;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return (x == 0.0) ? 0 : y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* convert from L/R <-> Mid/Side */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -72,9 +101,162 @@ ms_convert(III_side_info_t * l3_side, int gr)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ init_xrpow_core_c(gr_info * const cod_info, FLOAT xrpow[576], int upper, FLOAT * sum)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vsum,vsum2,vsum3,vsum4,vmax,vmax2,vmax3,vmax4,vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vc1,vc2,vc3,vc4,vc5,vperm;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vconst1 = (vector float)VINIT4ALL(0.25);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vconst2 = (vector float)VINIT4ALL(1.25);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *sum = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_sl(vc1,vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_sl(vc3,vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_or(vc3,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_xor(vsum,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_xor(vmax,vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax2 = vec_xor(vmax2,vmax2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_xor(vsum3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax3 = vec_xor(vmax3,vmax3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum4 = vec_xor(vsum4,vsum4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax4 = vec_xor(vmax4,vmax4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,(cod_info->xr));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm = vec_lvsl(0,(cod_info->xr));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 0; i <= upper-15; i+=16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,(cod_info->xr)+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,(cod_info->xr)+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,(cod_info->xr)+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,(cod_info->xr)+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v0,v1,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v1,v2,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v2,v3,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v3,v4,vperm);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_abs(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_abs(v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_abs(v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_abs(v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_add(vsum,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_add(vsum2,v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_add(vsum3,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum4 = vec_add(vsum4,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_re(vec_rsqrte(vec_rsqrte(v9)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_re(vec_rsqrte(vec_rsqrte(v10)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_re(vec_rsqrte(vec_rsqrte(v11)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_re(vec_rsqrte(vec_rsqrte(v12)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = (vector float)vec_cmpeq(vzero,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = (vector float)vec_cmpeq(vzero,v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = (vector float)vec_cmpeq(vzero,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = (vector float)vec_cmpeq(vzero,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_madd(v1,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_madd(v2,v2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_madd(v3,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_madd(v4,v4,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_madd(v13,v13,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_madd(v14,v14,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_madd(v15,v15,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_madd(v16,v16,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v17 = vec_madd(v9,vconst1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v18 = vec_madd(v10,vconst1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v19 = vec_madd(v11,vconst1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v20 = vec_madd(v12,vconst1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_nmsub(v13,v17,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_nmsub(v14,v18,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_nmsub(v15,v19,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_nmsub(v16,v20,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(v13,v1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(v14,v2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_madd(v15,v3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(v16,v4,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sel(v1,vzero,(vector unsigned int)v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_sel(v2,vzero,(vector unsigned int)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sel(v3,vzero,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_sel(v4,vzero,(vector unsigned int)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v17 = vec_madd(v1,v9,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v18 = vec_madd(v2,v10,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v19 = vec_madd(v3,v11,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v20 = vec_madd(v4,v12,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v17,0,xrpow+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v18,16,xrpow+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v19,32,xrpow+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v20,48,xrpow+i);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(v17,vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax2 = vec_max(v18,vmax2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax3 = vec_max(v19,vmax3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax4 = vec_max(v20,vmax4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(vmax,vmax2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax3 = vec_max(vmax3,vmax4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(vmax,vmax3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_add(vsum,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_add(vsum3,vsum4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_add(vsum,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_slo(vmax,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_slo(vsum,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_max(v1,vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_slo(v3,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_slo(v4,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(v3,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_add(v4,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_perm(vmax,vmax,vec_lvsr(0,&(cod_info->xrpow_max)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_perm(vsum,vsum,vec_lvsr(0,sum));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vmax,0,&(cod_info->xrpow_max));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum,0,sum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; i <= upper; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp = fabs(cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i] = sqrt(tmp * sqrt(tmp));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if(1) // will work on G3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT tmp2,tmp3,tmp4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 0; i <= upper-3; i+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp = fabs (cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp2 = fabs (cod_info->xr[i+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp3 = fabs (cod_info->xr[i+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp4 = fabs (cod_info->xr[i+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i] = ppc_sqrt (tmp * ppc_sqrt(tmp));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i+1] = ppc_sqrt (tmp2 * ppc_sqrt(tmp2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i+2] = ppc_sqrt (tmp3 * ppc_sqrt(tmp3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i+3] = ppc_sqrt (tmp4 * ppc_sqrt(tmp4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i+1] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i+1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i+2] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i+2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i+3] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i+3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; i <= upper; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ tmp = fabs(cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xrpow[i] = ppc_sqrt(tmp * ppc_sqrt(tmp));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (xrpow[i] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = xrpow[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i <= upper; ++i) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ tmp = fabs(cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -83,6 +265,8 @@ init_xrpow_core_c(gr_info * const cod_info, FLOAT xrpow[576], int upper, FLOAT *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (xrpow[i] > cod_info->xrpow_max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ cod_info->xrpow_max = xrpow[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1495,7 +1679,7 @@ VBR_old_iteration_loop(lame_internal_flags * gfc, const FLOAT pe[2][2],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[2][2][SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int bands[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int frameBits[15];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int used_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1650,7 +1834,7 @@ VBR_new_iteration_loop(lame_internal_flags * gfc, const FLOAT pe[2][2],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[2][2][SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[2][2][576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[2][2][576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int frameBits[15];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int used_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int max_bits[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1904,7 +2088,7 @@ ABR_iteration_loop(lame_internal_flags * gfc, const FLOAT pe[2][2],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int targ_bits[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mean_bits, max_frame_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int ch, gr, ath_over;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1991,7 +2175,7 @@ CBR_iteration_loop(lame_internal_flags * gfc, const FLOAT pe[2][2],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int targ_bits[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mean_bits, max_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int gr, ch;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/quantize_pvt.c libmp3lame/quantize_pvt.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index d8d6447..0a346f9 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/quantize_pvt.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/quantize_pvt.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -27,6 +27,11 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -751,6 +756,39 @@ calc_xmin(lame_internal_flags const *gfc,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static FLOAT
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ calc_noise_core_c(const gr_info * const cod_info, int *startline, int l, FLOAT step)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,va,vb,vstep,vzero,vnoise1,vnoise2,vix01;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm1,vperm2,vperm5,vperm6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vx7,vshamt,vone;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vmask1,vmask2,vmask3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v10,v11,v12,v13,v14,v15,v16,v17;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vc1,vc2,vc3,vc4,vc5,vc6,vperm3,vperm4,vmask;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float temp[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp[0] = step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vstep = vec_ld(0,temp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm6 = (vector unsigned char)VINIT16(0,0,3,19,0,0,7,23,0,0,11,27,0,0,15,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm5 = vec_sld(vperm6,vperm6,2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_splat_u32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_sld((vector unsigned int)vzero,vmask1,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask3 = vec_sld((vector unsigned int)vzero,vmask1,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_sld((vector unsigned int)vzero,vmask1,12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm3 = (vector unsigned char)VINIT16(0,0,0,0,0,0,0,0,0,1,2,3,16,17,18,19);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_sld(vperm3,(vector unsigned char)vzero,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = (vector unsigned char)VINIT16ALL(16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vstep = vec_splat(vstep,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_xor(vnoise1,vnoise1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise2 = vec_xor(vnoise2,vnoise2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vone = vec_splat_s32(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_splat_s32(2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT noise = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int j = *startline;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const int *const ix = cod_info->l3_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -767,9 +805,55 @@ calc_noise_core_c(const gr_info * const cod_info, int *startline, int l, FLOAT s
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else if (j > cod_info->big_values) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT ix01[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT ix01[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix01[0] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix01[1] = step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vix01 = vec_ld(0,ix01);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = vec_lvsl(0,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_ld(0,ix+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = vec_lvsl(0,ix+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>1;l-=2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_ld(16,ix+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vx1,vx2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_abs(v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vx2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_sl(vx3,(vector unsigned int)vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = vec_add(vx4,vone);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = vec_add(vx4,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx7 = vec_add(vx5,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_perm(vx4,vx5,vperm5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vx6,vx7,vperm6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_or(vx2,vx3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(vix01,vix01,(vector unsigned char)vx4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_sub(va,v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_madd(va,va,vnoise1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sld(vnoise1,vnoise1,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(vnoise1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sld(v2,v2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v4,v4,vec_lvsr(0,&noise));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v5,0,&noise);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(l) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - ix01[ix[j]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - ix01[ix[j]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (l--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ temp = fabs(cod_info->xr[j]) - ix01[ix[j]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -779,8 +863,138 @@ calc_noise_core_c(const gr_info * const cod_info, int *startline, int l, FLOAT s
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = vec_lvsl(0,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>3;l-=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v2,v3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_abs(v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_abs(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v2,v2,vec_lvsl(0,pow43+ix[j]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v6,v6,vec_lvsl(-4,pow43+ix[j+1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v10,v10,vec_lvsl(-8,pow43+ix[j+2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v14,v14,vec_lvsl(-12,pow43+ix[j+3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_sel(v4,v8,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_sel(v4,v12,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_sel(v4,v16,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_nmsub(v4,vstep,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_lvsl(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_lvsl(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_lvsl(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_lvsl(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_or(vc2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_or(vc4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_perm(vc1,vc2,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc6 = vec_perm(vc3,vc4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v3,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v4,v5,vc6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sld(v6,v7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_nmsub(v8,vstep,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j+=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v3,v3,vec_lvsl(0,pow43+ix[j]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v7,v7,vec_lvsl(-4,pow43+ix[j+1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v11,v11,vec_lvsl(-8,pow43+ix[j+2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v17 = vec_perm(v15,v15,vec_lvsl(-12,pow43+ix[j+3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_sel(v5,v9,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_sel(v5,v13,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_sel(v5,v17,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_nmsub(v5,vstep,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_lvsl(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_lvsl(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_lvsl(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_lvsl(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_or(vc2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_or(vc4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_perm(vc1,vc2,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc6 = vec_perm(vc3,vc4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v3,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v4,v5,vc6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_sld(v6,v7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_nmsub(v8,vstep,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_madd(va,va,vnoise1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise2 = vec_madd(vb,vb,vnoise2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j+=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_add(vnoise1,vnoise2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>1;l-=2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,cod_info->xr+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_abs(v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+ix[j]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+ix[j+1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+ix[j+2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+ix[j+3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v2,vec_lvsl(0,pow43+ix[j]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v3,vec_lvsl(-4,pow43+ix[j+1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v4,v4,vec_lvsl(-8,pow43+ix[j+2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v5,v5,vec_lvsl(-12,pow43+ix[j+3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sel(v6,v7,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sel(v6,v8,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sel(v6,v9,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_nmsub(v6,vstep,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise1 = vec_madd(va,va,vnoise1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_sld(vnoise1,vnoise1,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_add(vnoise1,v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_sld(v2,v2,4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_add(v2,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v4,v4,vec_lvsr(0,&noise));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(v5,0,&noise);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(l) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (l--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -790,6 +1004,7 @@ calc_noise_core_c(const gr_info * const cod_info, int *startline, int l, FLOAT s
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *startline = j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/tables.c libmp3lame/tables.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index a023099..124a87c 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/tables.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/tables.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -406,7 +406,7 @@ const uint8_t t33l[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-const struct huffcodetab ht[HTN] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++const struct huffcodetab ht[HTN] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* xlen, linmax, table, hlen */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0, 0, NULL, NULL},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {2, 0, t1HB, t1l},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/takehiro.c libmp3lame/takehiro.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 67aba1b..368ff2e 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/takehiro.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/takehiro.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -26,6 +26,10 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -222,6 +226,150 @@ quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xp, int *pi)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xr, int *ix)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,va,vb,vistep,vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vprev;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm1,vperm2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float const1 = (vector float)VINIT4(0.4053964553387788,3.404263724373839,5.465086767819913,1.0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float const2 = (vector float)VINIT4(7.719205369637751,10.93017829043677,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#ifndef _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int temp[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float temp2[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp2[0] = istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vistep = vec_ld(0,temp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vistep = vec_splat(vistep,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ l = l >> 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = vec_lvsl(0,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = vec_lvsr(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_ld(-16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vec_perm(vx1,vx2,vec_lvsl(0,ix));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>3;l-=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v2,v3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_madd(v4,vistep,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_madd(v5,vistep,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_floor(va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_floor(vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_splat(const1,2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_splat(const1,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_splat(const2,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_splat(const2,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v2,v4,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v3,v4,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v2,v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v3,v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_splat(const1,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_splat(const1,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v8,v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v9,v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v10,v2,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v11,v3,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_re(v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_nmsub(v10,v6,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_nmsub(v11,v7,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v10,v6,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v11,v7,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_madd(v8,v10,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = vec_madd(v9,v11,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_cts(va,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_cts(vb,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vprev,vx1,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_perm(vx1,vx2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx3,0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx4,16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vx2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vx1,vx2,vec_lvsl(0,ix));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_perm(vprev,vx3,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx4,0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>1;l-=2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT x0, x1, x2, x3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int rx0, rx1, rx2, rx3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x0 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x0, rx0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x1, rx1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x3 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x2, rx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x0 += QUANTFAC(rx0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x3, rx3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 += QUANTFAC(rx1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x0, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2 += QUANTFAC(rx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x1, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x3 += QUANTFAC(rx3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x2, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x3, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(;l>1;l-=2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_madd(v4,vistep,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_cts(va,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st((vector unsigned int)vx1,0,temp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,adj43+temp[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,adj43+temp[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,adj43+temp[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,adj43+temp[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v2,vec_lvsl(0,adj43+temp[0]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v3,vec_lvsl(-4,adj43+temp[1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v4,v4,vec_lvsl(-8,adj43+temp[2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v5,v5,vec_lvsl(-12,adj43+temp[3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = vec_add(va,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_cts(va,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vprev,vx1,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx3,0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vprev = vx1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = vec_perm(vx1,vx2,vec_lvsl(0,ix));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = vec_perm(vprev,vx3,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vx4,0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (l) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT x0, x1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int rx0, rx1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x0 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 = *xr++ * istep;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x0, rx0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x1, rx1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x0 += QUANTFAC(rx0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 += QUANTFAC(rx1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x0, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ XRPOW_FTOI(x1, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int remaining;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ assert(l > 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -263,7 +411,7 @@ quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xr, int *ix)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ XRPOW_FTOI(x0, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ XRPOW_FTOI(x1, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -420,6 +568,60 @@ quantize_xrpow(const FLOAT * xp, int *pi, FLOAT istep, gr_info const *const cod_
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* ix_max */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*************************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ix_max_vec(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int vresult[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int max1=0, max2=0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v1, v2, v3, v4, v5, v6, v7, vmax;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vc1,vc2,vc3,vc4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(end - ix < 8) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int i = (end-ix)/4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int remain = (end-ix)%4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_sl(vc1,vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_sl(vc3,vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0, ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0, ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_xor(vmax, vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while(i--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16, ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v1, v2, vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vec_max(vmax,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_slo(vmax,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_max(vmax,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_slo(v5,vc4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_max(v5,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(v7,0,vresult);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ max1 = vresult[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(!remain) return max1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //max2 = vresult[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*if(vresult[2] > max1) max1 = vresult[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(vresult[3] > max2) max2 = vresult[3];*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x2 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (max1 < x1) max1 = x1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (max2 < x2) max2 = x2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(max1 < max2) max1 = max2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return max1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix_max(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -438,14 +640,14 @@ ix_max(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ max1 = max2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return max1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__ALTIVEC__) || (defined(__ALTIVEC__) && !defined(_ARCH_PPC64))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ count_bit_ESC(const int *ix, const int *const end, int t1, const int t2, unsigned int *const s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -481,6 +683,7 @@ count_bit_ESC(const int *ix, const int *const end, int t1, const int t2, unsigne
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -507,6 +710,7 @@ static const int huf_tbl_noESC[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__ALTIVEC__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ count_bit_noESC_from2(const int *ix, const int *end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -533,6 +737,7 @@ count_bit_noESC_from2(const int *ix, const int *end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -572,6 +777,651 @@ count_bit_noESC_from3(const int *ix, const int *end, int max, unsigned int * s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_ESC_altivec(const int *ix, const int *const end, int t1, const int t2, int *const s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* ESC-table is used */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int const linbits = ht[t1].xlen * 65536 + ht[t2].xlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int sum = 0, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vsum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vshamt;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vzero,vs1,vs2,vs3,vs4,vs5,vs6,vlimit1,vlimit2,vone;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned char tmp[16] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int tmp2[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlimit1 = vec_splat_u8(14);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlimit2 = vec_splat_u8(15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vone = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_splat_u8(4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_xor(vsum,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_packs(v1,v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_packs(v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector signed int)vec_packs(v5,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector signed int)vec_packs(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = vec_packs((vector unsigned short)v1,(vector unsigned short)v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = vec_packs((vector unsigned short)v2,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = vec_sel(vs1,vlimit2,vec_cmpgt(vs1,vlimit1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs4 = vec_sel(vs2,vlimit2,vec_cmpgt(vs2,vlimit1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs5 = vec_sel(vzero,vone,vec_cmpgt(vs1,vlimit1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs6 = vec_sel(vzero,vone,vec_cmpgt(vs2,vlimit1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs5 = vec_add(vs5,vs6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vec_sum4s(vs5,vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = vec_sl(vs3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = vec_add(vs3,vs4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vs3,0,tmp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[4]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[5]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[6]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[7]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[8]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[9]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[10]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[11]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[12]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[13]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[14]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[tmp[15]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = (vector unsigned int)vec_sums((vector signed int)vsum,(vector signed int)vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vsum,0,tmp2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += tmp2[3] * linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int y = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (x >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (y >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x <<= 4u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x += y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int y = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (x >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (y >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x <<= 4u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x += y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 = sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum >>= 16u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t1 = t2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from2_altivec1(const int *ix, const int *end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int sum1, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int *table = table23;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vxlen,vzero,vsum1,vsum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vx;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vhlen1,vhlen2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed char vs1,vs2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen1 = (vector unsigned char)VINIT16(1,4,7,4,5,7,6,7,8,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen2 = (vector unsigned char)VINIT16(2,3,7,4,4,7,6,7,8,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxlen = vec_splat_s32(3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_xor(vsum1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = (vector signed int)vec_mladd((vector unsigned short)v1,(vector unsigned short)vxlen,(vector unsigned short)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = (vector signed int)vec_mladd((vector unsigned short)v3,(vector unsigned short)vxlen,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = (vector signed int)vec_pack(vx1,vx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = (vector signed int)vec_mladd((vector unsigned short)v5,(vector unsigned short)vxlen,(vector unsigned short)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = (vector signed int)vec_mladd((vector unsigned short)v7,(vector unsigned short)vxlen,(vector unsigned short)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = (vector signed int)vec_pack(vx4,vx5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = (vector unsigned char)vec_pack((vector unsigned short)vx3,(vector unsigned short)vx6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = (vector signed char)vec_perm(vhlen1,vhlen1,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = (vector signed char)vec_perm(vhlen2,vhlen2,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sum4s(vs1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sum4s(vs2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sums(vsum1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sums(vsum2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(vsum1,vsum1,vec_lvsr(4,&sum1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_perm(vsum2,vsum2,vec_lvsr(4,&sum2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum1,0,&sum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum2,0,&sum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += table[ x0 * xlen + x1 ];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = (sum>>16u) + sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += table[ x0 * xlen + x1 ];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 = sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum >>= 16u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t1++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from2_altivec2(const int *ix, const int *end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int sum1, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int *table = table56;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vxlen,vzero,vsum1,vsum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vx;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vhlen1,vhlen2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed char vs1,vs2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen1 = (vector unsigned char)VINIT16(1,4,7,8,4,5,8,9,7,8,9,10,8,8,9,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen2 = (vector unsigned char)VINIT16(3,4,6,8,4,4,6,7,5,6,7,8,7,7,8,9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxlen = vec_splat_s32(4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_xor(vsum1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = (vector signed int)vec_mladd((vector unsigned short)v1,(vector unsigned short)vxlen,(vector unsigned short)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = (vector signed int)vec_mladd((vector unsigned short)v3,(vector unsigned short)vxlen,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = (vector signed int)vec_pack(vx1,vx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = (vector signed int)vec_mladd((vector unsigned short)v5,(vector unsigned short)vxlen,(vector unsigned short)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = (vector signed int)vec_mladd((vector unsigned short)v7,(vector unsigned short)vxlen,(vector unsigned short)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = (vector signed int)vec_pack(vx4,vx5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = (vector unsigned char)vec_pack((vector unsigned short)vx3,(vector unsigned short)vx6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = (vector signed char)vec_perm(vhlen1,vhlen1,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = (vector signed char)vec_perm(vhlen2,vhlen2,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sum4s(vs1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sum4s(vs2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sums(vsum1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sums(vsum2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(vsum1,vsum1,vec_lvsr(4,&sum1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_perm(vsum2,vsum2,vec_lvsr(4,&sum2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum1,0,&sum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum2,0,&sum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += table[ x0 * xlen + x1 ];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = (sum>>16u) + sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += table[ x0 * xlen + x1 ];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 = sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum >>= 16u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t1++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from3_altivec1(const int *ix, const int *const end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum1 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum3 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen1 = ht[7].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen2 = ht[8].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen3 = ht[9].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vxlen,vzero,vsum1,vsum2,vsum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vx,v31;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vhlen11,vhlen12,vhlen13,vhlen21,vhlen22,vhlen23,vhlen31,vhlen32,vhlen33;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed char vs1,vs2,vs3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen11 = (vector unsigned char)VINIT16(1,4,7,9,9,10,4,6,8,9,9,10,7,7,9,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen12 = (vector unsigned char)VINIT16(10,11,8,9,10,11,11,11,8,9,10,11,11,12,9,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen13 = (vector unsigned char)VINIT16(11,12,12,12,0,0,0,0,0,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen21 = (vector unsigned char)VINIT16(2,4,7,9,9,10,4,4,6,10,10,10,7,6,8,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen22 = (vector unsigned char)VINIT16(10,11,9,10,10,11,11,12,9,9,10,11,12,12,10,10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen23 = (vector unsigned char)VINIT16(11,11,13,13,0,0,0,0,0,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen31 = (vector unsigned char)VINIT16(3,4,6,7,9,10,4,5,6,7,8,10,5,6,7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen32 = (vector unsigned char)VINIT16(9,10,7,7,8,9,9,10,8,8,9,9,10,11,9,9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen33 = (vector unsigned char)VINIT16(10,10,11,11,0,0,0,0,0,0,0,0,0,0,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v31 = (vector unsigned char)VINIT16ALL(31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxlen = vec_splat_s32(6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_xor(vsum1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_xor(vsum3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //int *end2 = ix + 32*((int)(end - ix)/32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = (vector signed int)vec_mladd((vector unsigned short)v1,(vector unsigned short)vxlen,(vector unsigned short)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = (vector signed int)vec_mladd((vector unsigned short)v3,(vector unsigned short)vxlen,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = (vector signed int)vec_pack(vx1,vx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = (vector signed int)vec_mladd((vector unsigned short)v5,(vector unsigned short)vxlen,(vector unsigned short)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = (vector signed int)vec_mladd((vector unsigned short)v7,(vector unsigned short)vxlen,(vector unsigned short)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = (vector signed int)vec_pack(vx4,vx5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = (vector unsigned char)vec_pack((vector unsigned short)vx3,(vector unsigned short)vx6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_perm(vhlen11,vhlen12,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_perm(vhlen13,vhlen13,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector signed int)vec_perm(vhlen21,vhlen22,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector signed int)vec_perm(vhlen23,vhlen23,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = (vector signed int)vec_perm(vhlen31,vhlen32,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = (vector signed int)vec_perm(vhlen33,vhlen33,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = (vector signed int)vec_cmpgt(vx,v31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = (vector signed char)vec_sel(v1,v2,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = (vector signed char)vec_sel(v3,v4,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = (vector signed char)vec_sel(v5,v6,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sum4s(vs1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sum4s(vs2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_sum4s(vs3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sums(vsum1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sums(vsum2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_sums(vsum3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(vsum1,vsum1,vec_lvsr(4,&sum1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_perm(vsum2,vsum2,vec_lvsr(4,&sum2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_perm(vsum3,vsum3,vec_lvsr(4,&sum3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum1,0,(signed int *)&sum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum2,0,(signed int *)&sum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum3,0,(signed int *)&sum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x = ix[0] * xlen + ix[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x = ix[0] * xlen + ix[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum3) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1+2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from3_altivec2(const int *ix, const int *const end, int max, unsigned int *s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum1 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum3 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen1 = ht[10].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen2 = ht[11].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen3 = ht[12].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vx1,vx2,vx3,vx4,vx5,vx6,vxlen,vzero,vsum1,vsum2,vsum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vmask,vperm1,vperm2,vx,v31;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vhlen11,vhlen12,vhlen13,vhlen14,vhlen21,vhlen22,vhlen23,vhlen24,vhlen31,vhlen32,vhlen33,vhlen34;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed char vs1,vs2,vs3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen11 = (vector unsigned char)VINIT16( 1, 4, 7, 9, 10, 10, 10, 11, 4, 6, 8, 9, 10, 11, 10, 10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen12 = (vector unsigned char)VINIT16( 7, 8, 9, 10, 11, 12, 11, 11, 8, 9, 10, 11, 12, 12, 11, 12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen13 = (vector unsigned char)VINIT16( 9, 10, 11, 12, 12, 12, 12, 12,10, 11, 12, 12, 13, 13, 12, 13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen14 = (vector unsigned char)VINIT16( 9, 10, 11, 12, 12, 12, 13, 13,10, 10, 11, 12, 12, 13, 13, 13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen21 = (vector unsigned char)VINIT16( 2, 4, 6, 8, 9, 10, 9, 10, 4, 5, 6, 8, 10, 10, 9, 10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen22 = (vector unsigned char)VINIT16( 6, 7, 8, 9, 10, 11, 10, 10, 8, 8, 9, 11, 10, 12, 10, 11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen23 = (vector unsigned char)VINIT16( 9, 10, 10, 11, 11, 12, 11, 12, 9, 10, 11, 12, 12, 13, 12, 13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen24 = (vector unsigned char)VINIT16( 9, 9, 9, 10, 11, 12, 12, 12, 9, 9, 10, 11, 12, 12, 12, 12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen31 = (vector unsigned char)VINIT16( 4, 4, 6, 8, 9, 10, 10, 10, 4, 5, 6, 7, 9, 9, 10, 10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen32 = (vector unsigned char)VINIT16( 6, 6, 7, 8, 9, 10, 9, 10, 7, 7, 8, 8, 9, 10, 10, 10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen33 = (vector unsigned char)VINIT16( 8, 8, 9, 9, 10, 10, 10, 11, 9, 9, 10, 10, 10, 11, 10, 11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhlen34 = (vector unsigned char)VINIT16( 9, 9, 9, 10, 10, 11, 11, 12,10, 10, 10, 11, 11, 11, 11, 12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = (vector unsigned char)VINIT16(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = (vector unsigned char)VINIT16(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v31 = (vector unsigned char)VINIT16ALL(31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxlen = vec_splat_s32(8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_xor(vsum1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_xor(vsum2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_xor(vsum3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if((int)(end - ix) < 32) goto normal;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //int *end2 = ix + 32*((int)(end - ix)/32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = vec_lvsl(0,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(16,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(32,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(48,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_ld(64,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_ld(80,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_ld(96,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_ld(112,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_ld(128,ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v0,v1,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v1,v2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v2,v3,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v3,v4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_perm(v4,v5,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v14 = vec_perm(v5,v6,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v15 = vec_perm(v6,v7,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v16 = vec_perm(v7,v8,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_perm(v9,v10,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v9,v10,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v11,v12,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v11,v12,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v13,v14,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v13,v14,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v15,v16,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v15,v16,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx1 = (vector signed int)vec_mladd((vector unsigned short)v1,(vector unsigned short)vxlen,(vector unsigned short)v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx2 = (vector signed int)vec_mladd((vector unsigned short)v3,(vector unsigned short)vxlen,(vector unsigned short)v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx3 = (vector signed int)vec_pack(vx1,vx2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx4 = (vector signed int)vec_mladd((vector unsigned short)v5,(vector unsigned short)vxlen,(vector unsigned short)v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx5 = (vector signed int)vec_mladd((vector unsigned short)v7,(vector unsigned short)vxlen,(vector unsigned short)v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx6 = (vector signed int)vec_pack(vx4,vx5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = (vector unsigned char)vec_pack((vector unsigned short)vx3,(vector unsigned short)vx6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = (vector signed int)vec_perm(vhlen11,vhlen12,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = (vector signed int)vec_perm(vhlen13,vhlen14,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = (vector signed int)vec_perm(vhlen21,vhlen22,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector signed int)vec_perm(vhlen23,vhlen24,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = (vector signed int)vec_perm(vhlen31,vhlen32,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = (vector signed int)vec_perm(vhlen33,vhlen34,vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = (vector signed int)vec_cmpgt(vx,v31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = (vector signed char)vec_sel(v1,v2,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = (vector signed char)vec_sel(v3,v4,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs3 = (vector signed char)vec_sel(v5,v6,(vector unsigned int)v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sum4s(vs1,vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sum4s(vs2,vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_sum4s(vs3,vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while(ix < end-31);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_sums(vsum1,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_sums(vsum2,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_sums(vsum3,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vec_perm(vsum1,vsum1,vec_lvsr(4,&sum1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vec_perm(vsum2,vsum2,vec_lvsr(4,&sum2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vec_perm(vsum3,vsum3,vec_lvsr(4,&sum3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum1,0,(signed int *)&sum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum2,0,(signed int *)&sum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vsum3,0,(signed int *)&sum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (ix < end) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x = ix[0] * xlen + ix[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto end;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ normal:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int x = ix[0] * xlen + ix[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ end:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum3) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1+2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*************************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* choose table */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -599,12 +1449,21 @@ typedef int (*count_fnc)(const int* ix, const int* end, int max, unsigned int* s
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const count_fnc count_fncs[] =
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ { &count_bit_null
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from2_altivec1
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from2_altivec2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_altivec1
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_altivec1
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_altivec2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_altivec2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -621,7 +1480,11 @@ choose_table_nonMMX(const int *ix, const int *const end, int *const _s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int* s = (unsigned int*)_s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int max;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int choice, choice2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ max = ix_max_vec(ix, end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ max = ix_max(ix, end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (max <= 15) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return count_fncs[max](ix, end, max, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -643,7 +1506,11 @@ choose_table_nonMMX(const int *ix, const int *const end, int *const _s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ break;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__ALTIVEC__) && defined(_ARCH_PPC64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return count_bit_ESC_altivec(ix, end, choice, choice2, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return count_bit_ESC(ix, end, choice, choice2, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/util.c libmp3lame/util.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 43b457c..ea4b204 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/util.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/util.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -26,6 +26,10 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__ALTIVEC__) && !defined(_ARCH_PPC64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <float.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -954,6 +958,108 @@ disable_FPE(void)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ***********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__ALTIVEC__) && !defined(_ARCH_PPC64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline ieee754_float32_t fast_log10_altivec(ieee754_float32_t x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float va,vb,vc,vhalf,vzero,vsqrt2,vconst4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,vz,vz2,vlog;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vconst1,vconst2,vshamt;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vconst3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float out __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = (vector float)VINIT4ALL(0.8685890659);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = (vector float)VINIT4ALL(0.2894672153);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc = (vector float)VINIT4ALL(0.1793365895);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = (vector float)VINIT4ALL(0.15051499783);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(1.4142135623731);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst4 = (vector float)VINIT4ALL(0.301029995664);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst1 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(9));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_nor(vconst2,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst3 = (vector signed int)vec_rl(vconst2,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vec_splat_u32(9),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vshamt,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_sl((vector unsigned int)vconst3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,&x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v1,v1,vec_lvsl(0,&x));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_splat(v2,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector float)vec_sel(vconst2,(vector unsigned int)v3,vconst1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz = vec_madd(v6, vec_madd(vec_nmsub(v7,v5,(vector float)vconst2),v7,v7), vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = (vector float)vec_sr((vector unsigned int)v3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_ctf(vec_sub((vector signed int)v8,vconst3),0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz2 = vec_madd(vz,vz,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vlog,vconst4,vhalf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vz2,vc,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vz2,v1,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vz,v2,vlog);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vlog,0,&out);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return out;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline ieee754_float32_t fast_loge_altivec(ieee754_float32_t x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float va,vb,vc,vhalf,vzero,vsqrt2,vconst4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v1,v2,v3,v4,v5,v6,v7,v8,vz,vz2,vlog;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vconst1,vconst2,vshamt;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vconst3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float out __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va = (vector float)VINIT4ALL(2.0000006209);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb = (vector float)VINIT4ALL(0.6664778517);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc = (vector float)VINIT4ALL(0.4139745860);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vhalf = (vector float)VINIT4ALL(0.34657359028);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsqrt2 = (vector float)VINIT4ALL(1.4142135623731);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst4 = (vector float)VINIT4ALL(0.6931471805599);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst1 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(9));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = (vector unsigned int)vec_sr(vec_splat_s32(-1),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_nor(vconst2,vconst2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst3 = (vector signed int)vec_rl(vconst2,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vec_splat_u32(9),vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vshamt = vec_add(vshamt,vec_splat_u32(7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vconst2 = vec_sl((vector unsigned int)vconst3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,&x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v1,v1,vec_lvsl(0,&x));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_splat(v2,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = (vector float)vec_sel(vconst2,(vector unsigned int)v3,vconst1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_add(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_sub(v4,vsqrt2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz = vec_madd(v6, vec_madd(vec_nmsub(v7,v5,(vector float)vconst2),v7,v7), vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = (vector float)vec_sr((vector unsigned int)v3,vshamt);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_ctf(vec_sub((vector signed int)v8,vconst3),0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vz2 = vec_madd(vz,vz,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vlog,vconst4,vhalf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_madd(vz2,vc,vb);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_madd(vz2,v1,va);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vlog = vec_madd(vz,v2,vlog);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_ste(vlog,0,&out);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return out;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++init_log_table(void)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define LOG2_SIZE (512)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define LOG2_SIZE_L2 (9)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1004,6 +1110,8 @@ fast_log2(ieee754_float32_t x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return log2val;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #else /* Don't use FAST_LOG */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/util.h libmp3lame/util.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 13f0cd4..a0b3b55 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/util.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/util.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -93,10 +93,17 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* log/log10 approximations */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifdef USE_FAST_LOG
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__ALTIVEC__) && !defined(_ARCH_PPC64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define FAST_LOG10(x) (fast_log10_altivec(x))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define FAST_LOG(x) (fast_loge_altivec(x))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define FAST_LOG10_X(x,y) (fast_log10_altivec(x)*(y))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define FAST_LOG_X(x,y) (fast_loge_altivec(x)*(y))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG10(x) (fast_log2(x)*(LOG2/LOG10))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG(x) (fast_log2(x)*LOG2)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG10_X(x,y) (fast_log2(x)*(LOG2/LOG10*(y)))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG_X(x,y) (fast_log2(x)*(LOG2*(y)))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG10(x) log10(x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define FAST_LOG(x) log(x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -186,14 +193,14 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT masking_lower[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT masking_lower[CBANDS] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT minval[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT rnumlines[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT mld_cb[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT mld[Max(SBMAX_l,SBMAX_s)];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT bo_weight[Max(SBMAX_l,SBMAX_s)]; /* band weight long scalefactor bands, at transition */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT attack_threshold; /* short block tuning */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- int s3ind[CBANDS][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int s3ind[CBANDS][4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int numlines[CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int bm[Max(SBMAX_l,SBMAX_s)];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int bo[Max(SBMAX_l,SBMAX_s)];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -219,7 +226,7 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT nb_l1[4][CBANDS], nb_l2[4][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT nb_l1[4][CBANDS] __attribute__ ((aligned (16))), nb_l2[4][CBANDS] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT nb_s1[4][CBANDS], nb_s2[4][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ III_psy_xmin thm[4];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -246,7 +253,7 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* variables used by encoder.c */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* variables for newmdct.c */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT sb_sample[2][2][18][SBLIMIT];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT sb_sample[2][2][18][SBLIMIT] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT amp_filter[32];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* variables used by util.c */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -293,7 +300,7 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifndef MFSIZE
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define MFSIZE ( 3*1152 + ENCDELAY - MDCTDELAY )
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- sample_t mfbuf[2][MFSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sample_t mfbuf[2][MFSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mf_samples_to_encode;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mf_size;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -567,7 +574,12 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* log/log10 approximations */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ extern void init_log_table(void);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__ALTIVEC__) && !defined(_ARCH_PPC64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ extern ieee754_float32_t fast_log10_altivec(ieee754_float32_t x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ extern ieee754_float32_t fast_loge_altivec(ieee754_float32_t x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ extern ieee754_float32_t fast_log2(ieee754_float32_t x);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int isResamplingNecessary(SessionConfig_t const* cfg);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/vbrquantize.c libmp3lame/vbrquantize.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 0f703b7..67029c4 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/vbrquantize.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/vbrquantize.c
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -26,6 +26,10 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <altivec.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -217,8 +221,23 @@ k_34_4(DOUBLEX x[4], int l3[4])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static FLOAT
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, unsigned int bw, uint8_t sf)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float vpow[8] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float v0, v1, v2, v3, v4, v5, v6,v7,v8,v9,v10,v11,v12,v13;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm1, vperm2,vc1,vc2,vc3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector signed int vl1,vl2,vl3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector float vxfsf, vsfpow, vsfpow34, vabs, vzero;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int s1,s2,s3,s4,s5,s6,s7,s8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float const1 = (vector float)VINIT4(0.4053964553387788,3.404263724373839,5.465086767819913,1.0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const vector float const2 = (vector float)VINIT4(7.719205369637751,10.93017829043677,0,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned int vmask1,vmask2,vmask3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vector unsigned char vperm3,vperm4,vc4,vc5,vc6,vmask;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ DOUBLEX x[4];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- int l3[4];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int l3[4] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const FLOAT sfpow = pow20[sf + Q_MAX2]; /*pow(2.0,sf/4.0); */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const FLOAT sfpow34 = ipow20[sf]; /*pow(sfpow,-3.0/4.0); */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -226,6 +245,239 @@ calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, unsigned int bw, uint8_
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int i = bw >> 2u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int const remaining = (bw & 0x03u);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[0] = sfpow;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[1] = sfpow34;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsfpow = vec_ld(0,vpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_xor(vxfsf,vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsfpow34 = vec_splat(vsfpow,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsfpow = vec_splat(vsfpow,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm1 = vec_lvsl(0,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm2 = vec_lvsl(0,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vec_ld(0,xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vabs = (vector float)vec_splat_s32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vabs = (vector float)vec_sl((vector unsigned int)vabs, (vector unsigned int)vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vzero = vec_xor(vzero,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_splat_u8(1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_splat_u8(5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_sl(vc1,vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = (vector unsigned int)vec_splat_s32(-1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask1 = vec_sro(vmask1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask2 = vec_sro(vmask1,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask3 = vec_sro(vmask2,vc3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm3 = (vector unsigned char)VINIT16(0,0,0,0,0,0,0,0,0,1,2,3,16,17,18,19);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vperm4 = vec_sld(vperm3,(vector unsigned char)vzero,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmask = (vector unsigned char)VINIT16ALL(16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; i > 1; i -= 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v1,v2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v2,v3,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_madd(v4,vsfpow34,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_madd(v5,vsfpow34,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_floor(v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_floor(v13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_splat(const1,2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_splat(const1,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_splat(const2,1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_splat(const2,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v2,v4,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v3,v4,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v2,v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v3,v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_splat(const1,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_splat(const1,3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_madd(v8,v2,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_madd(v9,v3,v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v10,v2,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v11,v3,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_re(v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_re(v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_nmsub(v10,v6,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_nmsub(v11,v7,v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v10,v6,v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v11,v7,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_madd(v8,v10,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_madd(v9,v11,v13);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl1 = vec_cts(v10,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl2 = vec_cts(v11,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl3 = (vector signed int)vec_pack(vl1,vl2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vl3,0,l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = l3[0] >> 16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = l3[0] & 0xffff;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s3 = l3[1] >> 16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s4 = l3[1] & 0xffff;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s5 = l3[2] >> 16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s6 = l3[2] & 0xffff;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s7 = l3[3] >> 16;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s8 = l3[3] & 0xffff;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+s1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+s2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+s3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+s4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v2,v2,vec_lvsl(0,pow43+s1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v3,v3,vec_lvsl(-4,pow43+s2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v4,v4,vec_lvsl(-8,pow43+s3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v5,vec_lvsl(-12,pow43+s4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sel(v2,v3,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sel(v12,v4,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sel(v12,v5,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+s5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+s6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+s7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+s8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_perm(v2,v2,vec_lvsl(0,pow43+s5));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v3,v3,vec_lvsl(-4,pow43+s6));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_perm(v4,v4,vec_lvsl(-8,pow43+s7));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_perm(v5,v5,vec_lvsl(-12,pow43+s8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sel(v2,v3,vmask1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sel(v13,v4,vmask2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sel(v13,v5,vmask3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_lvsl(0,pow43+s1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_lvsl(0,pow43+s2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_lvsl(0,pow43+s3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_lvsl(0,pow43+s4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_or(vc2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_or(vc4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+s1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+s2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+s3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+s4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_perm(vc1,vc2,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc6 = vec_perm(vc3,vc4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v3,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v4,v5,vc6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_sld(v6,v7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vec_lvsl(0,pow43+s5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_lvsl(0,pow43+s6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc3 = vec_lvsl(0,pow43+s7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_lvsl(0,pow43+s8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vec_or(vc2,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc4 = vec_or(vc4,vmask);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+s5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+s6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+s7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+s8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc5 = vec_perm(vc1,vc2,vperm3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc6 = vec_perm(vc3,vc4,vperm4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v3,vc5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v4,v5,vc6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v13 = vec_sld(v6,v7,8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16, xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_ld(32, xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v0,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v2,v3,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_andc(v6,vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_andc(v7,vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_nmsub(vsfpow, v12, v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_nmsub(vsfpow, v13, v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_madd(v10, v10, vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_madd(v11, v11, vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr34 += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (i) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[0] = sfpow34 * xr34[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[1] = sfpow34 * xr34[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[2] = sfpow34 * xr34[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[3] = sfpow34 * xr34[3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ k_34_4(x, l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[0] = pow43[l3[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[1] = pow43[l3[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[2] = pow43[l3[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vpow[3] = pow43[l3[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vec_ld(0, vpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16, xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v0,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_andc(v3,vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_nmsub(vsfpow, v1, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_madd(v5, v5, vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16,xr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v1,v2,vperm2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_madd(v3,vsfpow34,vzero);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl1 = vec_cts(v4,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vl1,0,l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,adj43+l3[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_lde(0,adj43+l3[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_lde(0,adj43+l3[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_lde(0,adj43+l3[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v5,v5,vec_lvsl(0,adj43+l3[0]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_perm(v6,v6,vec_lvsl(-4,adj43+l3[1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v11 = vec_perm(v7,v7,vec_lvsl(-8,adj43+l3[2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v12 = vec_perm(v8,v8,vec_lvsl(-12,adj43+l3[3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_or(v9,v10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_or(v9,v11);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_or(v9,v12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v10 = vec_add(v4,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vl1 = vec_cts(v10,0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vl1,0,l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_lde(0,pow43+l3[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_lde(0,pow43+l3[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_lde(0,pow43+l3[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_lde(0,pow43+l3[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_perm(v2,v2,vec_lvsl(0,pow43+l3[0]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = vec_perm(v3,v3,vec_lvsl(-4,pow43+l3[1]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v8 = vec_perm(v4,v4,vec_lvsl(-8,pow43+l3[2]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v9 = vec_perm(v5,v5,vec_lvsl(-12,pow43+l3[3]));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = vec_or(v6,v9);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vec_ld(16, xr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vec_perm(v0,v2,vperm1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vec_andc(v3,vabs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vec_nmsub(vsfpow, v6, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vxfsf = vec_madd(v5, v5, vxfsf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr34 += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (remaining) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[0] = x[1] = x[2] = x[3] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ switch( remaining ) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 3: x[2] = sfpow34 * xr34[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 2: x[1] = sfpow34 * xr34[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 1: x[0] = sfpow34 * xr34[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ k_34_4(x, l3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x[0] = x[1] = x[2] = x[3] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ switch( remaining ) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 3: x[2] = fabsf(xr[2]) - sfpow * pow43[l3[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 2: x[1] = fabsf(xr[1]) - sfpow * pow43[l3[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ case 1: x[0] = fabsf(xr[0]) - sfpow * pow43[l3[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vec_st(vxfsf,0,vpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return xfsf + vpow[0] + vpow[1] + vpow[2] + vpow[3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (i-- > 0) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[0] = sfpow34 * xr34[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[1] = sfpow34 * xr34[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -262,6 +514,7 @@ calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, unsigned int bw, uint8_
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return xfsf;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git libmp3lame/version.h libmp3lame/version.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index f5fef50..69edd42 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/version.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/version.h
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -31,13 +31,20 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define STR(x) __STR(x)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-# define LAME_URL "http://lame.sf.net"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if __ALTIVEC__
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if _ARCH_PPC64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++# define LAME_URL "+VMX+970 http://www.floodgap.com/software/lamevmx/"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++# define LAME_URL "+VMX http://www.floodgap.com/software/lamevmx/"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++# define LAME_URL "G3 http://www.floodgap.com/software/lamevmx/"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define LAME_MAJOR_VERSION 3 /* Major version number */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define LAME_MINOR_VERSION 100 /* Minor version number */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define LAME_TYPE_VERSION 2 /* 0:alpha 1:beta 2:release */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-# define LAME_PATCH_VERSION 0 /* Patch level */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++# define LAME_PATCH_VERSION 1 /* Patch level */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define LAME_ALPHA_VERSION (LAME_TYPE_VERSION==0)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define LAME_BETA_VERSION (LAME_TYPE_VERSION==1)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # define LAME_RELEASE_VERSION (LAME_TYPE_VERSION==2)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git ltmain.sh ltmain.sh
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index 0f0a2da..acc8382 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- ltmain.sh
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ ltmain.sh
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -5103,7 +5103,7 @@ func_extract_an_archive ()
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ :
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++: # func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fi
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+diff --git mpglib/Makefile.in mpglib/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+index edc519f..7284783 100644
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- mpglib/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ mpglib/Makefile.in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -426,22 +426,22 @@ distclean-compile:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tabinit.Plo@am__quote@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.o:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.obj:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ .c.lo:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
</span><span style='display:block; white-space:pre;color:#808080;'>diff --git a/audio/lame/files/lame-3.100-neon-20230418.diff b/audio/lame/files/lame-3.100-neon-20230418.diff
</span>new file mode 100644
<span style='display:block; white-space:pre;color:#808080;'>index 00000000000..d3f9569e43d
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>--- /dev/null
</span><span style='display:block; white-space:pre;background:#e0e0ff;'>+++ b/audio/lame/files/lame-3.100-neon-20230418.diff
</span><span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -0,0 +1,1593 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+# https://tmkk.undo.jp/lame/lame-3.100-neon-20230418.diff
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/fft.c.orig 2017-09-07 04:33:36
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/fft.c 2023-04-12 19:30:22
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -45,9 +45,18 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "fft.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "vector/lame_intrin.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <arm_neon.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vcopyq_laneq_f32(a, lane1, b, lane2) vsetq_lane_f32(vgetq_lane_f32(b, lane2), a, lane1)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__ARM_FEATURE_FMA)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_f32 vmlaq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmsq_f32 vmlsq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define TRI_SIZE (5-1) /* 1024 = 4**5 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* fft.c */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -103,6 +112,9 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } while (fi < fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c1 = tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ s1 = tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (kx < 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 1; i < kx; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT c2, s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c2 = 1 - (2 * s1) * s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -142,6 +154,143 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT c2, s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float cs[16] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vc1, vc2, vs1, vs2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(i = 1; i < 4; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = 1 - (2*s1)*s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = (2*s1)*c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[i] = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[i+4] = c2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[i+8] = s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[i+12] = s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[0] = cs[4] = cs[8] = cs[12] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vld1q_f32(cs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vld1q_f32(cs+4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = vld1q_f32(cs+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = vld1q_f32(cs+12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi = fz;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi = fz + k1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vfi0, vfi1, vfi2, vfi3, vgi0, vgi1, vgi2, vgi3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t va0, va1, vb0, vb1, vf0, vf1, vf2, vf3, vg0, vg1, vg2, vg3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi0 = vld1q_f32(fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi1 = vld1q_f32(fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi2 = vld1q_f32(fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi3 = vld1q_f32(fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vrev64q_f32(vld1q_f32(gi-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vrev64q_f32(vld1q_f32(gi+k1-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vrev64q_f32(vld1q_f32(gi+k2-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vrev64q_f32(vld1q_f32(gi+k3-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vextq_f32(vgi0, vgi0, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vextq_f32(vgi1, vgi1, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vextq_f32(vgi2, vgi2, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vextq_f32(vgi3, vgi3, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va0 = vfmaq_f32(vmulq_f32(vfi1, vc2), vgi1, vs2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb0 = vfmsq_f32(vmulq_f32(vfi1, vs2), vgi1, vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va1 = vfmaq_f32(vmulq_f32(vfi3, vc2), vgi3, vs2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb1 = vfmsq_f32(vmulq_f32(vfi3, vs2), vgi3, vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf0 = vaddq_f32(vfi0, va0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vsubq_f32(vfi0, va0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg0 = vaddq_f32(vgi0, vb0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg1 = vsubq_f32(vgi0, vb0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vaddq_f32(vfi2, va1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vsubq_f32(vfi2, va1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg2 = vaddq_f32(vgi2, vb1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg3 = vsubq_f32(vgi2, vb1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va0 = vfmaq_f32(vmulq_f32(vf2, vc1), vg3, vs1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb0 = vfmsq_f32(vmulq_f32(vf2, vs1), vg3, vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va1 = vfmaq_f32(vmulq_f32(vg2, vs1), vf3, vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb1 = vfmsq_f32(vmulq_f32(vg2, vc1), vf3, vs1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fi, vcopyq_laneq_f32(vaddq_f32(vf0, va0), 0, vfi0, 0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fi+k1, vcopyq_laneq_f32(vaddq_f32(vf1, vb1), 0, vfi1, 0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fi+k2, vcopyq_laneq_f32(vsubq_f32(vf0, va0), 0, vfi2, 0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fi+k3, vcopyq_laneq_f32(vsubq_f32(vf1, vb1), 0, vfi3, 0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vrev64q_f32(vcopyq_laneq_f32(vaddq_f32(vg0, va1), 0, vgi0, 0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vrev64q_f32(vcopyq_laneq_f32(vaddq_f32(vg1, vb0), 0, vgi1, 0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vrev64q_f32(vcopyq_laneq_f32(vsubq_f32(vg0, va1), 0, vgi2, 0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vrev64q_f32(vcopyq_laneq_f32(vsubq_f32(vg1, vb0), 0, vgi3, 0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(gi-3, vextq_f32(vgi0, vgi0, 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(gi+k1-3, vextq_f32(vgi1, vgi1, 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(gi+k2-3, vextq_f32(vgi2, vgi2, 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(gi+k3-3, vextq_f32(vgi3, vgi3, 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (fi<fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 4; i < kx; i += 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(j = 0; j < 4; j++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = 1 - (2*s1)*s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s2 = (2*s1)*c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[j] = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[j+4] = c2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[j+8] = s1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cs[j+12] = s2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c2 = c1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c1 = c2 * tri[0] - s1 * tri[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ s1 = c2 * tri[1] + s1 * tri[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc1 = vld1q_f32(cs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vc2 = vld1q_f32(cs+4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs1 = vld1q_f32(cs+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs2 = vld1q_f32(cs+12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi = fz + i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi = fz + k1 - i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vfi0, vfi1, vfi2, vfi3, vgi0, vgi1, vgi2, vgi3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t va0, va1, vb0, vb1, vf0, vf1, vf2, vf3, vg0, vg1, vg2, vg3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi0 = vld1q_f32(fi);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi1 = vld1q_f32(fi+k1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi2 = vld1q_f32(fi+k2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vfi3 = vld1q_f32(fi+k3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vrev64q_f32(vld1q_f32(gi-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vrev64q_f32(vld1q_f32(gi+k1-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vrev64q_f32(vld1q_f32(gi+k2-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vrev64q_f32(vld1q_f32(gi+k3-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vextq_f32(vgi0, vgi0, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vextq_f32(vgi1, vgi1, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vextq_f32(vgi2, vgi2, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vextq_f32(vgi3, vgi3, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va0 = vfmaq_f32(vmulq_f32(vfi1, vc2), vgi1, vs2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb0 = vfmsq_f32(vmulq_f32(vfi1, vs2), vgi1, vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va1 = vfmaq_f32(vmulq_f32(vfi3, vc2), vgi3, vs2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb1 = vfmsq_f32(vmulq_f32(vfi3, vs2), vgi3, vc2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf0 = vaddq_f32(vfi0, va0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf1 = vsubq_f32(vfi0, va0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg0 = vaddq_f32(vgi0, vb0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg1 = vsubq_f32(vgi0, vb0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf2 = vaddq_f32(vfi2, va1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vf3 = vsubq_f32(vfi2, va1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg2 = vaddq_f32(vgi2, vb1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vg3 = vsubq_f32(vgi2, vb1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va0 = vfmaq_f32(vmulq_f32(vf2, vc1), vg3, vs1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb0 = vfmsq_f32(vmulq_f32(vf2, vs1), vg3, vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ va1 = vfmaq_f32(vmulq_f32(vg2, vs1), vf3, vc1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vb1 = vfmsq_f32(vmulq_f32(vg2, vc1), vf3, vs1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fi, vaddq_f32(vf0, va0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fi+k1, vaddq_f32(vf1, vb1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fi+k2, vsubq_f32(vf0, va0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fi+k3, vsubq_f32(vf1, vb1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi0 = vrev64q_f32(vaddq_f32(vg0, va1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi1 = vrev64q_f32(vaddq_f32(vg1, vb0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi2 = vrev64q_f32(vsubq_f32(vg0, va1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vgi3 = vrev64q_f32(vsubq_f32(vg1, vb0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(gi-3, vextq_f32(vgi0, vgi0, 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(gi+k1-3, vextq_f32(vgi1, vgi1, 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(gi+k2-3, vextq_f32(vgi2, vgi2, 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(gi+k3-3, vextq_f32(vgi3, vgi3, 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fi += k4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ } while (fi<fn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ tri += 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } while (k4 < n);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/gain_analysis.c.orig 2017-10-11 04:08:39
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/gain_analysis.c 2023-04-12 19:32:02
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -99,6 +99,20 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "gain_analysis.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <arm_neon.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vaddvq_f32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t b = vtrnq_f32(a, a); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t c = vaddq_f32(b.val[0], b.val[1]); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vget_lane_f32(vadd_f32(vget_high_f32(c), vget_low_f32(c)), 0); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__ARM_FEATURE_FMA)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_f32 vmlaq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmsq_f32 vmlsq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* for each filter: */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* [0] 48 kHz, [1] 44.1 kHz, [2] 32 kHz, [3] 24 kHz, [4] 22050 Hz, [5] 16 kHz, [6] 12 kHz, [7] is 11025 Hz, [8] 8 kHz */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -109,6 +123,33 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*lint -save -e736 loss of precision */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const Float_t ABYule[9][multiple_of(4, 2 * YULE_ORDER + 1)] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* 20 18 16 14 12 10 8 6 4 2 0 19 17 15 13 11 9 7 5 3 1 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ { 0.00288463683916, 0.00012025322027, 0.00306428023191, 0.00594298065125, -0.02074045215285, 0.02161526843274, -0.01655260341619, -0.00009291677959, -0.00123395316851, -0.02160367184185, 0.03857599435200, 0, 0.13919314567432, -0.86984376593551, 2.75465861874613, -5.87257861775999, 9.48293806319790,-12.28759895145294, 13.05504219327545,-11.34170355132042, 7.81501653005538, -3.84664617118067, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {-0.00187763777362, 0.00674613682247, -0.00240879051584, 0.01624864962975, -0.02596338512915, 0.02245293253339, -0.00834990904936, -0.00851165645469, -0.00848709379851, -0.02911007808948, 0.05418656406430, 0, 0.13149317958808, -0.75104302451432, 2.19611684890774, -4.39470996079559, 6.85401540936998, -8.81498681370155, 9.47693607801280, -8.54751527471874, 6.36317777566148, -3.47845948550071, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {-0.00881362733839, 0.00651420667831, -0.01390589421898, 0.03174092540049, 0.00222312597743, 0.04781476674921, -0.05588393329856, 0.02163541888798, -0.06247880153653, -0.09331049056315, 0.15457299681924, 0, 0.02347897407020, -0.05032077717131, 0.16378164858596, -0.45953458054983, 1.00595954808547, -1.67148153367602, 2.23697657451713, -2.64577170229825, 2.84868151156327, -2.37898834973084, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {-0.02950134983287, 0.00205861885564, -0.00000828086748, 0.06276101321749, -0.00584456039913, -0.02364141202522, -0.00915702933434, 0.03282930172664, -0.08587323730772, -0.22613988682123, 0.30296907319327, 0, 0.00302439095741, 0.02005851806501, 0.04500235387352, -0.22138138954925, 0.39120800788284, -0.22638893773906, -0.16276719120440, -0.25656257754070, 1.07977492259970, -1.61273165137247, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {-0.01760176568150, -0.01635381384540, 0.00832043980773, 0.05724228140351, -0.00589500224440, -0.00469977914380, -0.07834489609479, 0.11921148675203, -0.11828570177555, -0.25572241425570, 0.33642304856132, 0, 0.02977207319925, -0.04237348025746, 0.08333755284107, -0.04067510197014, -0.12453458140019, 0.47854794562326, -0.80774944671438, 0.12205022308084, 0.87350271418188, -1.49858979367799, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ { 0.00541907748707, -0.03193428438915, -0.01863887810927, 0.10478503600251, 0.04097565135648, -0.12398163381748, 0.04078262797139, -0.01419140100551, -0.22784394429749, -0.14351757464547, 0.44915256608450, 0, 0.03222754072173, 0.05784820375801, 0.06747620744683, 0.00613424350682, 0.22199650564824, -0.42029820170918, 0.00213767857124, -0.37256372942400, 0.29661783706366, -0.62820619233671, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {-0.00588215443421, -0.03788984554840, 0.08647503780351, 0.00647310677246, -0.27562961986224, 0.30931782841830, -0.18901604199609, 0.16744243493672, 0.16242137742230, -0.75464456939302, 0.56619470757641, 0, 0.01807364323573, 0.01639907836189, -0.04784254229033, 0.06739368333110, -0.33032403314006, 0.45054734505008, 0.00819999645858, -0.26806001042947, 0.29156311971249, -1.04800335126349, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {-0.00749618797172, -0.03721611395801, 0.06920467763959, 0.01628462406333, -0.25344790059353, 0.15558449135573, 0.02377945217615, 0.17520704835522, -0.14289799034253, -0.53174909058578, 0.58100494960553, 0, 0.01818801111503, 0.02442357316099, -0.02505961724053, -0.05246019024463, -0.23313271880868, 0.38952639978999, 0.14728154134330, -0.20256413484477, -0.31863563325245, -0.51035327095184, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {-0.02217936801134, 0.04788665548180, -0.04060034127000, -0.11202315195388, -0.02459864859345, 0.14590772289388, -0.10214864179676, 0.04267842219415, -0.00275953611929, -0.42163034350696, 0.53648789255105, 0, 0.04704409688120, 0.05477720428674, -0.18823009262115, -0.17556493366449, 0.15113130533216, 0.26408300200955, -0.04678328784242, -0.03424681017675, -0.43193942311114, -0.25049871956020, 0, 0}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const Float_t ABButter[9][multiple_of(4, 2 * BUTTER_ORDER + 1)] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* 5 3 1 4 2 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.98621192462708, -1.97242384925416, 0.98621192462708, 0, 0.97261396931306, -1.97223372919527, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.98500175787242, -1.97000351574484, 0.98500175787242, 0, 0.97022847566350, -1.96977855582618, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97938932735214, -1.95877865470428, 0.97938932735214, 0, 0.95920349965459, -1.95835380975398, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97531843204928, -1.95063686409857, 0.97531843204928, 0, 0.95124613669835, -1.95002759149878, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97316523498161, -1.94633046996323, 0.97316523498161, 0, 0.94705070426118, -1.94561023566527, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.96454515552826, -1.92909031105652, 0.96454515552826, 0, 0.93034775234268, -1.92783286977036, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.96009142950541, -1.92018285901082, 0.96009142950541, 0, 0.92177618768381, -1.91858953033784, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.95856916599601, -1.91713833199203, 0.95856916599601, 0, 0.91885558323625, -1.91542108074780, 0, 0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.94597685600279, -1.89195371200558, 0.94597685600279, 0, 0.89487434461664, -1.88903307939452, 0, 0}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const Float_t ABYule[9][multiple_of(4, 2 * YULE_ORDER + 1)] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* 20 18 16 14 12 10 8 6 4 2 0 19 17 15 13 11 9 7 5 3 1 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ { 0.00288463683916, 0.00012025322027, 0.00306428023191, 0.00594298065125, -0.02074045215285, 0.02161526843274, -0.01655260341619, -0.00009291677959, -0.00123395316851, -0.02160367184185, 0.03857599435200, 0.13919314567432, -0.86984376593551, 2.75465861874613, -5.87257861775999, 9.48293806319790,-12.28759895145294, 13.05504219327545,-11.34170355132042, 7.81501653005538, -3.84664617118067},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -134,6 +175,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.95856916599601, 0.91885558323625, -1.91713833199203, -1.91542108074780, 0.95856916599601},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.94597685600279, 0.89487434461664, -1.89195371200558, -1.88903307939452, 0.94597685600279}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*lint -restore */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -143,7 +185,62 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* When calling this procedure, make sure that ip[-order] and op[-order] point to real data! */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++filterIntegrated(const Float_t * input, Float_t * output1, Float_t * output2, size_t nSamples, const Float_t * const kernel1, const Float_t * const kernel2)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vk1 = vld1q_f32(kernel1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vk2 = vld1q_f32(kernel1+4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vk3 = vld1q_f32(kernel1+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vk4 = vld1q_f32(kernel1+12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vk5 = vld1q_f32(kernel1+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vk6 = vld1q_f32(kernel1+20);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vk7 = vld1q_f32(kernel2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vk8 = vld1q_f32(kernel2+4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vi1 = vld1q_f32(input-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vi2 = vld1q_f32(input-6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vi3 = vcombine_f32(vld1_f32(input-2), vdup_n_f32(0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vo1 = vld1q_f32(output1-10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vo2 = vld1q_f32(output1-6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vo3 = vcombine_f32(vld1_f32(output1-2), vdup_n_f32(0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vo4 = vcombine_f32(vld1_f32(output2-2), vdup_n_f32(0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ goto start;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (1) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vsum1, vsum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi1 = vextq_f32(vi1, vi2, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi2 = vextq_f32(vi2, vi3, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vld1q_lane_f32(input, vi3, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vi3 = vextq_f32(vi3, vi3, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo1 = vextq_f32(vo1, vo2, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo2 = vextq_f32(vo2, vo3, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vextq_f32(vo3, vo3, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo4 = vextq_f32(vo4, vo4, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++start:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vmulq_f32( vi1, vk1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vmulq_f32( vo1, vk4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vfmaq_f32(vsum1, vi2, vk2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vfmaq_f32(vsum2, vo2, vk5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vfmaq_f32(vsum1, vi3, vk3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vfmaq_f32(vsum2, vo3, vk6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vsubq_f32(vsum1, vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vfmsq_f32(vdupq_n_f32(0), vo4, vk8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float out = vaddvq_f32(vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo3 = vsetq_lane_f32(out, vo3, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ output1[0] = out;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vfmaq_f32(vsum2, vo3, vk7);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ out = vaddvq_f32(vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vo4 = vsetq_lane_f32(out, vo4, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ output2[0] = out;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++output1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++output2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ++input;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (--nSamples == 0) break;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ filterYule(const Float_t * input, Float_t * output, size_t nSamples, const Float_t * const kernel)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (nSamples--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -188,6 +285,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ++input;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -323,6 +421,12 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curright = right_samples + cursamplepos;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ filterIntegrated(curleft, rgData->lstep + rgData->totsamp, rgData->lout + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ABYule[rgData->freqindex], ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ filterIntegrated(curright, rgData->rstep + rgData->totsamp, rgData->rout + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ABYule[rgData->freqindex], ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ YULE_FILTER(curleft, rgData->lstep + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABYule[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ YULE_FILTER(curright, rgData->rstep + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -332,6 +436,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ BUTTER_FILTER(rgData->rstep + rgData->totsamp, rgData->rout + rgData->totsamp, cursamples,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ABButter[rgData->freqindex]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curleft = rgData->lout + rgData->totsamp; /* Get the squared values */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ curright = rgData->rout + rgData->totsamp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/newmdct.c.orig 2011-05-08 01:05:17
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/newmdct.c 2023-04-12 19:33:49
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -35,6 +35,13 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "encoder.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "util.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "newmdct.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <arm_neon.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__aarch64__) && !defined(__ARM_FEATURE_FMA)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_f32 vmlaq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmsq_f32 vmlsq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -435,6 +442,95 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const sample_t *x2 = &x1[238 - 14 - 286];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 0; i < 16; i+=4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x4_t vw;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vs, vt, vx;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp-10, vw, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+ 8, vw, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+26, vw, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+44, vw, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1+224-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vmulq_f32( vld1q_f32(x2-224 ), vw.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vmulq_f32( vextq_f32(vx, vx, 2), vw.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1+160-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vld1q_f32(x2-160 ), vw.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmaq_f32(vt, vextq_f32(vx, vx, 2), vw.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1+ 96-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vld1q_f32(x2- 96 ), vw.val[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmaq_f32(vt, vextq_f32(vx, vx, 2), vw.val[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1+ 32-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vld1q_f32(x2- 32 ), vw.val[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmaq_f32(vt, vextq_f32(vx, vx, 2), vw.val[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp- 6, vw, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+12, vw, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+30, vw, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+48, vw, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1- 32-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vld1q_f32(x2+ 32 ), vw.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmaq_f32(vt, vextq_f32(vx, vx, 2), vw.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1- 96-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vld1q_f32(x2+ 96 ), vw.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmaq_f32(vt, vextq_f32(vx, vx, 2), vw.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1-160-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vld1q_f32(x2+160 ), vw.val[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmaq_f32(vt, vextq_f32(vx, vx, 2), vw.val[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1-224-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vld1q_f32(x2+224 ), vw.val[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmaq_f32(vt, vextq_f32(vx, vx, 2), vw.val[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp- 2, vw, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+16, vw, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+34, vw, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+52, vw, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1-256-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmsq_f32(vt, vld1q_f32(x2+256 ), vw.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vextq_f32(vx, vx, 2), vw.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1-192-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmsq_f32(vt, vld1q_f32(x2+192 ), vw.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vextq_f32(vx, vx, 2), vw.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1-128-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmsq_f32(vt, vld1q_f32(x2+128 ), vw.val[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vextq_f32(vx, vx, 2), vw.val[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1- 64-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmsq_f32(vt, vld1q_f32(x2+ 64 ), vw.val[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vextq_f32(vx, vx, 2), vw.val[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+ 2, vw, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+20, vw, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+38, vw, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw = vld4q_lane_f32(wp+56, vw, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1+ 0-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmsq_f32(vt, vld1q_f32(x2- 0 ), vw.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vextq_f32(vx, vx, 2), vw.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1+ 64-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmsq_f32(vt, vld1q_f32(x2- 64 ), vw.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vextq_f32(vx, vx, 2), vw.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1+128-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmsq_f32(vt, vld1q_f32(x2-128 ), vw.val[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vextq_f32(vx, vx, 2), vw.val[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vrev64q_f32( vld1q_f32(x1+192-3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt = vfmsq_f32(vt, vld1q_f32(x2-192 ), vw.val[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vfmaq_f32(vs, vextq_f32(vx, vx, 2), vw.val[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t vw2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vld2q_lane_f32(wp+ 6, vw2, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vld2q_lane_f32(wp+24, vw2, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vld2q_lane_f32(wp+42, vw2, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2 = vld2q_lane_f32(wp+60, vw2, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vs = vmulq_f32(vs, vw2.val[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vx = vsubq_f32(vt, vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2.val[0] = vaddq_f32(vt, vs);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vw2.val[1] = vmulq_f32(vx, vw2.val[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst2q_f32(a+i*2 ,vw2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1 -= 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2 += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wp += 18*4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x1++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x2--;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wp -= 18;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = -15; i < 0; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT w, s, t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -501,6 +597,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x1--;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x2++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT s, t, u, v;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ t = x1[-16] * wp[-10];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/psymodel.c.orig 2017-09-07 04:38:23
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/psymodel.c 2023-04-12 19:48:58
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -154,6 +154,24 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame_global_flags.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "fft.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame-analysis.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <arm_neon.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vcopyq_laneq_f32(a, lane1, b, lane2) vsetq_lane_f32(vgetq_lane_f32(b, lane2), a, lane1)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vaddvq_f32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t b = vtrnq_f32(a, a); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t c = vaddq_f32(b.val[0], b.val[1]); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vget_lane_f32(vadd_f32(vget_high_f32(c), vget_low_f32(c)), 0); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__ARM_FEATURE_FMA)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_f32 vmlaq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmsq_f32 vmlsq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_n_f32 vmlaq_n_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#elif !defined(__clang__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_n_f32 vmlaq_n_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define NSFIRLEN 21
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -662,6 +680,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2], int chn,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int gr_out, FLOAT fftenergy[HBLKSIZE], FLOAT(*wsamp_l)[BLKSIZE])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -691,6 +710,192 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[0] = wsamp_l[0][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[0] *= fftenergy[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t venergy = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (j = 0; j < 64; j++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0 = vld1q_f32(wsamp_l[0]+j*8+1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v1 = vld1q_f32(wsamp_l[0]+j*8+5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v2 = vrev64q_f32(vld1q_f32(wsamp_l[0]-j*8+1020));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v3 = vrev64q_f32(vld1q_f32(wsamp_l[0]-j*8+1016));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vextq_f32(v2, v2, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vextq_f32(v3, v3, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmulq_f32(v0, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vmulq_f32(v1, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vfmaq_f32(v0, v2, v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vfmaq_f32(v1, v3, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmulq_n_f32(v0, 0.5f);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vmulq_n_f32(v1, 0.5f);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ venergy = vaddq_f32(venergy, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ venergy = vaddq_f32(venergy, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fftenergy+j*8+1, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fftenergy+j*8+5, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* total energy */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT totalenergy = vaddvq_f32(venergy) - fftenergy[512];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (j = 1; j < 11; j++)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ totalenergy -= fftenergy[j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ psv->tot_ener[chn] = totalenergy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (plt) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (j = 0; j < HBLKSIZE; j++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ plt->energy[gr_out][chn][j] = plt->energy_save[chn][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ plt->energy_save[chn][j] = fftenergy[j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++vbrpsy_compute_fft_l_js(lame_internal_flags * gfc, const sample_t * const buffer[2],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int gr_out, FLOAT fftenergy_m[HBLKSIZE], FLOAT fftenergy_s[HBLKSIZE], FLOAT(*wsamp_l)[BLKSIZE])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ PsyStateVar_t *psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ * compute energies
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* FFT data for mid and side channel is derived from L & R */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0, v1, v2, v3, v4, v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t venergy_m = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t venergy_s = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* 1st loop : wsamp_l[*][0] .. wsamp_l[*][3], wsamp_l[*][1021] .. wsamp_l[*][1023] */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vld1q_f32(wsamp_l[0]); /* {[0][0], [0][1], [0][2], [0][3]} */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vld1q_f32(wsamp_l[1]); /* {[1][0], [1][1], [1][2], [1][3]} */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vcombine_f32(vdup_n_f32(0), vrev64_f32(vld1_f32(wsamp_l[0]+1021))); /* {0, 0, [0][1022], [0][1021]} */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vld1q_lane_f32(wsamp_l[0]+1023, v2, 1); /* {0, [0][1023], [0][1022], [0][1021]} */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vcombine_f32(vdup_n_f32(0), vrev64_f32(vld1_f32(wsamp_l[1]+1021))); /* {0, 0, [1][1022], [1][1021]} */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vld1q_lane_f32(wsamp_l[1]+1023, v3, 1); /* {0, [1][1023], [1][1022], [1][1021]} */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vaddq_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vaddq_f32(v2, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vsubq_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vsubq_f32(v2, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vmulq_n_f32(v4, sqrt2_half);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vmulq_n_f32(v5, sqrt2_half);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmulq_n_f32(v0, sqrt2_half);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vmulq_n_f32(v2, sqrt2_half);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*vst1q_f32(wsamp_l[0], v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(wsamp_l[1], v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vrev64q_f32(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vrev64q_f32(v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vextq_f32(v1, v1, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vextq_f32(v3, v3, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1_f32(wsamp_l[0]+1021, vget_low_f32(v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1_f32(wsamp_l[1]+1021, vget_low_f32(v3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_lane_f32(wsamp_l[0]+1023, v1, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_lane_f32(wsamp_l[1]+1023, v3, 2);*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vcopyq_laneq_f32(v5, 0, v4, 0); /* {[0][0], [0][1023], [0][1022], [0][1021]} */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vcopyq_laneq_f32(v2, 0, v0, 0); /* {[1][0], [1][1023], [1][1022], [1][1021]} */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vmulq_f32(v4, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmulq_f32(v0, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vfmaq_f32(v4, v5, v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vfmaq_f32(v0, v2, v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vmulq_n_f32(v4, 0.5f);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmulq_n_f32(v0, 0.5f);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fftenergy_m, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fftenergy_s, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //venergy_m = vaddq_f32(venergy_m, v4); /* sum of fftenergy_m[0..3] is not needed */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //venergy_s = vaddq_f32(venergy_s, v0); /* sum of fftenergy_s[0..3] is not needed */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* 2nd to 128th loop : wsamp_l[*][4] to wsamp_l[*][511], wsamp_l[*][1020] to wsamp_l[*][513] */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (j = 1; j < 128; j++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vld1q_f32(wsamp_l[0]+j*4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vld1q_f32(wsamp_l[1]+j*4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vrev64q_f32(vld1q_f32(wsamp_l[0]-j*4+1021));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vrev64q_f32(vld1q_f32(wsamp_l[1]-j*4+1021));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vextq_f32(v2, v2, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vextq_f32(v3, v3, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vaddq_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vaddq_f32(v2, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vsubq_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vsubq_f32(v2, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vmulq_n_f32(v4, sqrt2_half);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vmulq_n_f32(v5, sqrt2_half);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmulq_n_f32(v0, sqrt2_half);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vmulq_n_f32(v2, sqrt2_half);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*vst1q_f32(wsamp_l[0]+j*4, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(wsamp_l[1]+j*4, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vrev64q_f32(v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vrev64q_f32(v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vextq_f32(v1, v1, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vextq_f32(v3, v3, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(wsamp_l[0]-j*4+1021, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(wsamp_l[1]-j*4+1021, v3);*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vmulq_f32(v4, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmulq_f32(v0, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vfmaq_f32(v4, v5, v5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vfmaq_f32(v0, v2, v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vmulq_n_f32(v4, 0.5f);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmulq_n_f32(v0, 0.5f);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fftenergy_m+j*4, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(fftenergy_s+j*4, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ venergy_m = vaddq_f32(venergy_m, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ venergy_s = vaddq_f32(venergy_s, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* finally: wsamp_l[*][512] */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT l = wsamp_l[0][512];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT r = wsamp_l[1][512];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT m = (l + r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT s = (l - r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //wsamp_l[0][512] = m;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //wsamp_l[1][512] = s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fftenergy_m[512] = m * m;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fftenergy_s[512] = s * s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* total energy */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT totalenergy = vaddvq_f32(venergy_m);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (j = 4; j < 11; j++)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ totalenergy -= fftenergy_m[j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ psv->tot_ener[2] = totalenergy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ totalenergy = vaddvq_f32(venergy_s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (j = 4; j < 11; j++)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ totalenergy -= fftenergy_s[j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ psv->tot_ener[3] = totalenergy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (plt) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (j = 0; j < HBLKSIZE; j++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ plt->energy[gr_out][2][j] = plt->energy_save[2][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ plt->energy_save[2][j] = fftenergy_m[j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ plt->energy[gr_out][3][j] = plt->energy_save[3][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ plt->energy_save[3][j] = fftenergy_s[j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2], int chn,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int gr_out, FLOAT fftenergy[HBLKSIZE], FLOAT(*wsamp_l)[BLKSIZE])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ PsyStateVar_t *psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (chn < 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fft_long(gfc, *wsamp_l, chn, buffer);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ else if (chn == 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* FFT data for mid and side channel is derived from L & R */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (j = BLKSIZE - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT const l = wsamp_l[0][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT const r = wsamp_l[1][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wsamp_l[0][j] = (l + r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wsamp_l[1][j] = (l - r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ * compute energies
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *********************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fftenergy[0] = wsamp_l[0][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ fftenergy[0] *= fftenergy[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE / 2 - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const re = (*wsamp_l)[BLKSIZE / 2 - j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const im = (*wsamp_l)[BLKSIZE / 2 + j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -712,6 +917,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -772,7 +978,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT energy[4], FLOAT sub_short_factor[4][3], int ns_attacks[4][4],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int uselongblock[2])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT ns_hpfsmpl[2][576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT ns_hpfsmpl[2][576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *const psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -793,6 +999,66 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* apply high pass filter of fs/4 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const sample_t *const firbuf = &buffer[chn][576 - 350 - NSFIRLEN + 192];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vbuf1, vbuf2, vbuf3, vbuf4, vbuf5, vbuf6, vbuf7;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf1 = vld1q_f32(firbuf);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf2 = vld1q_f32(firbuf+4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf3 = vld1q_f32(firbuf+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf4 = vld1q_f32(firbuf+12);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf5 = vld1q_f32(firbuf+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf6 = vld1q_f32(firbuf+20);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 0; ; i += 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vsum1, vsum2, v0, v1, v2, v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vld1q_f32(firbuf+i+10);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf7 = vld1q_f32(firbuf+i+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ (firbuf[0][1][2][3] + firbuf[21][22][23][24]) * fircoef[0]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ (firbuf[1][2][3][4] + firbuf[20][21][22][23]) * fircoef[1]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ :
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ (firbuf[8][9][10][11] + firbuf[13][14][15][16]) * fircoef[8]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ (firbuf[9][10][11][12] + firbuf[12][13][14][15]) * fircoef[9]
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vextq_f32(vbuf6, vbuf7, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vextq_f32(vbuf1, vbuf2, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vaddq_f32(vbuf1, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vaddq_f32(v1, vbuf6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vfmaq_n_f32(vsum1, v0, fircoef[0]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vmulq_n_f32( v1, fircoef[1]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vextq_f32(vbuf1, vbuf2, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vextq_f32(vbuf5, vbuf6, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vextq_f32(vbuf1, vbuf2, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vextq_f32(vbuf5, vbuf6, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vaddq_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vaddq_f32(v2, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vfmaq_n_f32(vsum1, v0, fircoef[2]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vfmaq_n_f32(vsum2, v2, fircoef[3]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vextq_f32(vbuf5, vbuf6, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vextq_f32(vbuf2, vbuf3, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vaddq_f32(vbuf2, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vaddq_f32(v1, vbuf5);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vfmaq_n_f32(vsum1, v0, fircoef[4]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vfmaq_n_f32(vsum2, v1, fircoef[5]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vextq_f32(vbuf2, vbuf3, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vextq_f32(vbuf4, vbuf5, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vextq_f32(vbuf2, vbuf3, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vextq_f32(vbuf4, vbuf5, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vaddq_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vaddq_f32(v2, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vfmaq_n_f32(vsum1, v0, fircoef[6]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vfmaq_n_f32(vsum2, v2, fircoef[7]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vextq_f32(vbuf4, vbuf5, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vextq_f32(vbuf3, vbuf4, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vaddq_f32(vbuf3, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vaddq_f32(v1, vbuf4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vfmaq_n_f32(vsum1, v0, fircoef[8]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vfmaq_n_f32(vsum2, v1, fircoef[9]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vaddq_f32(vsum1, vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(ns_hpfsmpl[chn]+i, vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (i == 572) break;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf1 = vbuf2; vbuf2 = vbuf3; vbuf3 = vbuf4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbuf4 = vbuf5; vbuf5 = vbuf6; vbuf6 = vbuf7;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i < 576; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT sum1, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ sum1 = firbuf[i + 10];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -803,6 +1069,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ns_hpfsmpl[chn][i] = sum1 + sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ masking_ratio[gr_out][chn].en = psv->en[chn];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ masking_ratio[gr_out][chn].thm = psv->thm[chn];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (n_chn_psy > 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1423,9 +1690,9 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* fft and energy calculation */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT(*wsamp_l)[BLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT(*wsamp_s)[3][BLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT fftenergy[HBLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT fftenergy[HBLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT fftenergy_s[3][HBLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT wsamp_L[2][BLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT wsamp_L[2][BLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT wsamp_S[2][3][BLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT eb[4][CBANDS], thr[4][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1457,6 +1724,26 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* LONG BLOCK CASE */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (chn = 0; chn < cfg->channels_out; chn++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int const ch01 = chn & 0x01;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wsamp_l = wsamp_L + ch01;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbrpsy_compute_fft_l(gfc, buffer, chn, gr_out, fftenergy, wsamp_l);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbrpsy_compute_loudness_approximation_l(gfc, gr_out, chn, fftenergy);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbrpsy_compute_masking_l(gfc, fftenergy, eb[chn], thr[chn], chn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (cfg->mode == JOINT_STEREO) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT fftenergy_side[HBLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbrpsy_compute_fft_l_js(gfc, buffer, gr_out, fftenergy, fftenergy_side, wsamp_L);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbrpsy_compute_masking_l(gfc, fftenergy, eb[2], thr[2], 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbrpsy_compute_masking_l(gfc, fftenergy_side, eb[3], thr[3], 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if ((uselongblock[0] + uselongblock[1]) == 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vbrpsy_compute_MS_thresholds(const_eb, thr, gdl->mld_cb, gfc->ATH->cb_l,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ath_factor, cfg->msfix, gdl->npart);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (chn = 0; chn < n_chn_psy; chn++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int const ch01 = chn & 0x01;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1471,6 +1758,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ath_factor, cfg->msfix, gdl->npart);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* TODO: apply adaptive ATH masking here ?? */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (chn = 0; chn < n_chn_psy; chn++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ convert_partition2scalefac_l(gfc, eb[chn], thr[chn], chn);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/quantize.c.orig 2017-08-15 22:40:45
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/quantize.c 2023-04-12 16:11:34
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -40,6 +40,29 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifdef HAVE_XMMINTRIN_H
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "vector/lame_intrin.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <arm_neon.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vaddvq_f32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t b = vtrnq_f32(a, a); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t c = vaddq_f32(b.val[0], b.val[1]); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vget_lane_f32(vadd_f32(vget_high_f32(c), vget_low_f32(c)), 0); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vsqrtq_f32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t b = vmaxq_f32(a, vreinterpretq_f32_u32(vdupq_n_u32(0x00800000))); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t e = vrsqrteq_f32(b); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(b, e), e), e); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(b, e), e), e); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmulq_f32(a, e); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vmaxnmq_f32 vmaxq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vmaxnmvq_f32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t b = vtrnq_f32(a, a); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t c = vmaxq_f32(b.val[0], b.val[1]); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vget_lane_f32(vmax_f32(vget_high_f32(c), vget_low_f32(c)), 0); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -72,10 +95,45 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ init_xrpow_core_c(gr_info * const cod_info, FLOAT xrpow[576], int upper, FLOAT * sum)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- int i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int i = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *sum = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- for (i = 0; i <= upper; ++i) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vsum = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vmax = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (i = 0; i <= upper - 15; i += 16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0 = vabsq_f32(vld1q_f32(cod_info->xr+i));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v1 = vabsq_f32(vld1q_f32(cod_info->xr+i+4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v2 = vabsq_f32(vld1q_f32(cod_info->xr+i+8));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v3 = vabsq_f32(vld1q_f32(cod_info->xr+i+12));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddq_f32(vsum, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddq_f32(vsum, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddq_f32(vsum, v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddq_f32(vsum, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vsqrtq_f32(vmulq_f32(v0, vsqrtq_f32(v0)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vsqrtq_f32(vmulq_f32(v1, vsqrtq_f32(v1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vsqrtq_f32(vmulq_f32(v2, vsqrtq_f32(v2)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v3 = vsqrtq_f32(vmulq_f32(v3, vsqrtq_f32(v3)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vmaxnmq_f32(vmax, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vmaxnmq_f32(vmax, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vmaxnmq_f32(vmax, v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vmaxnmq_f32(vmax, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(xrpow+i, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(xrpow+i+4, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(xrpow+i+8, v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(xrpow+i+12, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; i <= upper - 3; i += 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0 = vabsq_f32(vld1q_f32(cod_info->xr+i));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddq_f32(vsum, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vsqrtq_f32(vmulq_f32(v0, vsqrtq_f32(v0)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vmaxnmq_f32(vmax, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vst1q_f32(xrpow+i, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ cod_info->xrpow_max = vmaxnmvq_f32(vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *sum = vaddvq_f32(vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; i <= upper; ++i) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ tmp = fabs(cod_info->xr[i]);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *sum += tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ xrpow[i] = sqrt(tmp * sqrt(tmp));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1495,7 +1553,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[2][2][SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int bands[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int frameBits[15];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int used_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1904,7 +1962,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int targ_bits[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mean_bits, max_frame_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int ch, gr, ath_over;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1991,7 +2049,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int targ_bits[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mean_bits, max_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int gr, ch;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/quantize_pvt.c.orig 2017-09-07 04:33:36
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/quantize_pvt.c 2023-04-12 19:35:58
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -36,6 +36,22 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "reservoir.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame-analysis.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <float.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <arm_neon.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vaddvq_f32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t b = vtrnq_f32(a, a); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t c = vaddq_f32(b.val[0], b.val[1]); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vget_lane_f32(vadd_f32(vget_high_f32(c), vget_low_f32(c)), 0); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vceqzq_s32(a) vceqq_s32(a, vdupq_n_s32(0))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vceqz_s32(a) vceq_s32(a, vdup_n_s32(0))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__ARM_FEATURE_FMA)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_f32 vmlaq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfma_f32 vmla_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define NSATHSCALE 100 /* Assuming dynamic range=96dB, this value should be 92 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -767,6 +783,33 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else if (j > cod_info->big_values) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vnoise = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vstep = vdupq_n_f32(step);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; l - 3 > 0; l -= 4, j += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0 = vabsq_f32(vld1q_f32(cod_info->xr+j));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v1 = vabsq_f32(vld1q_f32(cod_info->xr+j+4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v2 = vsubq_f32(v0, vstep);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v3 = vsubq_f32(v1, vstep);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vbslq_f32(vceqzq_s32(vld1q_s32(ix+j)), v0, v2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vbslq_f32(vceqzq_s32(vld1q_s32(ix+j+4)), v1, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise = vfmaq_f32(vnoise, v0, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise = vfmaq_f32(vnoise, v1, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; l - 1 > 0; l -= 2, j += 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0 = vabsq_f32(vld1q_f32(cod_info->xr+j));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v1 = vsubq_f32(v0, vstep);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vbslq_f32(vceqzq_s32(vld1q_s32(ix+j)), v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise = vfmaq_f32(vnoise, v0, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; l > 0; l--, j += 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x2_t v0 = vabs_f32(vld1_f32(cod_info->xr+j));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x2_t v1 = vsub_f32(v0, vget_low_f32(vstep));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vbsl_f32(vceqz_s32(vld1_s32(ix+j)), v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise = vcombine_f32(vfma_f32(vget_low_f32(vnoise), v0, v0), vget_high_f32(vnoise));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += vaddvq_f32(vnoise);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT ix01[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix01[0] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix01[1] = step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -779,8 +822,33 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if 0
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vnoise = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; l - 3 > 0; l -= 4, j += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0 = vabsq_f32(vld1q_f32(cod_info->xr+j));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v1 = vabsq_f32(vld1q_f32(cod_info->xr+j+4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t v2 = vld1q_s32(ix+j);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t v3 = vld1q_s32(ix+j+4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v4 = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v5 = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vld1q_lane_f32(pow43+vgetq_lane_s32(v2, 0), v4, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vld1q_lane_f32(pow43+vgetq_lane_s32(v3, 0), v5, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vld1q_lane_f32(pow43+vgetq_lane_s32(v2, 1), v4, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vld1q_lane_f32(pow43+vgetq_lane_s32(v3, 1), v5, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vld1q_lane_f32(pow43+vgetq_lane_s32(v2, 2), v4, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vld1q_lane_f32(pow43+vgetq_lane_s32(v3, 2), v5, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v4 = vld1q_lane_f32(pow43+vgetq_lane_s32(v2, 3), v4, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = vld1q_lane_f32(pow43+vgetq_lane_s32(v3, 3), v5, 3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vfmsq_n_f32(v0, v4, step);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vfmsq_n_f32(v1, v5, step);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise = vfmaq_f32(vnoise, v0, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vnoise = vfmaq_f32(vnoise, v1, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += vaddvq_f32(vnoise);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (l--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/tables.c.orig 2011-05-08 01:05:17
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/tables.c 2023-04-12 12:30:32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -240,7 +240,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 7, 7, 8, 9
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const uint8_t t7l[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const uint8_t t7l[48] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 1, 4, 7, 9, 9, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 4, 6, 8, 9, 9, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 7, 7, 9, 10, 10, 11,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -249,7 +249,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 9, 10, 11, 12, 12, 12
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const uint8_t t8l[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const uint8_t t8l[48] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 2, 4, 7, 9, 9, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 4, 4, 6, 10, 10, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 7, 6, 8, 10, 10, 11,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -258,7 +258,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 10, 10, 11, 11, 13, 13
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const uint8_t t9l[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const uint8_t t9l[48] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 3, 4, 6, 7, 9, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 4, 5, 6, 7, 8, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 5, 6, 7, 8, 9, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -267,7 +267,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 9, 9, 10, 10, 11, 11
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const uint8_t t10l[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const uint8_t t10l[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 1, 4, 7, 9, 10, 10, 10, 11,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 4, 6, 8, 9, 10, 11, 10, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 7, 8, 9, 10, 11, 12, 11, 11,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -278,7 +278,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 10, 10, 11, 12, 12, 13, 13, 13
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const uint8_t t11l[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const uint8_t t11l[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 2, 4, 6, 8, 9, 10, 9, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 4, 5, 6, 8, 10, 10, 9, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 6, 7, 8, 9, 10, 11, 10, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -289,7 +289,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 9, 9, 10, 11, 12, 12, 12, 12
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const uint8_t t12l[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const uint8_t t12l[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 4, 4, 6, 8, 9, 10, 10, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 4, 5, 6, 7, 9, 9, 10, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 6, 6, 7, 8, 9, 10, 9, 10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/takehiro.c.orig 2017-09-07 04:33:36
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/takehiro.c 2023-04-12 16:32:51
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -33,6 +33,22 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "util.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "quantize_pvt.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "tables.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <arm_neon.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vaddvq_u32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t b = vtrnq_u32(a, a); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t c = vaddq_u32(b.val[0], b.val[1]); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vget_lane_u32(vadd_u32(vget_high_u32(c), vget_low_u32(c)), 0); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vaddv_u32(a) (vget_lane_u32(vpadd_u32(a, a), 0))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vmaxvq_s32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4x2_t b = vtrnq_s32(a, a); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t c = vmaxq_s32(b.val[0], b.val[1]); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vget_lane_s32(vmax_s32(vget_high_s32(c), vget_low_s32(c)), 0); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -572,7 +588,309 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from3_neon_7to9(const int *ix, const int *end, int max, unsigned int * s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum1 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum3 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen1 = ht[7].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen2 = ht[8].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen3 = ht[9].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x16x3_t vt7, vt8, vt9;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint16x8_t vsum1, vsum2, vsum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt7.val[0] = vld1q_u8(hlen1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt7.val[1] = vld1q_u8(hlen1+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt7.val[2] = vld1q_u8(hlen1+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt8.val[0] = vld1q_u8(hlen2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt8.val[1] = vld1q_u8(hlen2+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt8.val[2] = vld1q_u8(hlen2+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt9.val[0] = vld1q_u8(hlen3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt9.val[1] = vld1q_u8(hlen3+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt9.val[2] = vld1q_u8(hlen3+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vsum2 = vsum3 = vdupq_n_u16(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /*for (;ix < end - 32; ix += 32) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx1 = vld2q_u32((const unsigned int *)ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx2 = vld2q_u32((const unsigned int *)ix+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx3 = vld2q_u32((const unsigned int *)ix+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx4 = vld2q_u32((const unsigned int *)ix+24);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v0 = vmlaq_n_u32(vx1.val[1], vx1.val[0], 6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v1 = vmlaq_n_u32(vx2.val[1], vx2.val[0], 6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v2 = vmlaq_n_u32(vx3.val[1], vx3.val[0], 6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v3 = vmlaq_n_u32(vx4.val[1], vx4.val[0], 6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint16x8_t v4 = vuzp1q_u16(vreinterpretq_u16_u32(v0), vreinterpretq_u16_u32(v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint16x8_t v5 = vuzp1q_u16(vreinterpretq_u16_u32(v2), vreinterpretq_u16_u32(v3));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x16_t v6 = vuzp1q_u8(vreinterpretq_u8_u16(v4), vreinterpretq_u8_u16(v5));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x16_t v7 = vqtbl3q_u8(vt7, v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x16_t v8 = vqtbl3q_u8(vt8, v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x16_t v9 = vqtbl3q_u8(vt9, v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vaddw_u8(vsum1, vget_low_u8(vpaddq_u8(v7, v7)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vaddw_u8(vsum2, vget_low_u8(vpaddq_u8(v8, v8)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vaddw_u8(vsum3, vget_low_u8(vpaddq_u8(v9, v9)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }*/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;ix < end - 15; ix += 16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx1 = vld2q_u32((const unsigned int *)ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx2 = vld2q_u32((const unsigned int *)ix+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v0 = vmlaq_n_u32(vx1.val[1], vx1.val[0], 6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v1 = vmlaq_n_u32(vx2.val[1], vx2.val[0], 6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x8_t v2 = vmovn_u16(vuzp1q_u16(vreinterpretq_u16_u32(v0), vreinterpretq_u16_u32(v1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vaddw_u8(vsum1, vqtbl3_u8(vt7, v2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vaddw_u8(vsum2, vqtbl3_u8(vt8, v2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vaddw_u8(vsum3, vqtbl3_u8(vt9, v2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;ix < end - 7; ix += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx = vld2q_u32((const unsigned int *)ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v0 = vmlaq_n_u32(vx.val[1], vx.val[0], 6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint16x4_t v1 = vmovn_u32(v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x8_t v2 = vmovn_u16(vcombine_u16(v1, v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vaddw_u8(vsum1, vreinterpret_u8_u32(vset_lane_u32(0, vreinterpret_u32_u8(vqtbl3_u8(vt7, v2)), 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vaddw_u8(vsum2, vreinterpret_u8_u32(vset_lane_u32(0, vreinterpret_u32_u8(vqtbl3_u8(vt8, v2)), 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vaddw_u8(vsum3, vreinterpret_u8_u32(vset_lane_u32(0, vreinterpret_u32_u8(vqtbl3_u8(vt9, v2)), 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += vaddlvq_u16(vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += vaddlvq_u16(vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += vaddlvq_u16(vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;ix < end - 1;) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x = x0 * xlen + x1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum3) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1 + 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from3_neon_10to12(const int *ix, const int *end, int max, unsigned int * s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum1 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum3 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const unsigned int xlen = 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen1 = ht[10].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen2 = ht[11].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const uint8_t *const hlen3 = ht[12].hlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x16x4_t vt10, vt11, vt12;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint16x8_t vsum1, vsum2, vsum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt10.val[0] = vld1q_u8(hlen1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt10.val[1] = vld1q_u8(hlen1+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt10.val[2] = vld1q_u8(hlen1+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt10.val[3] = vld1q_u8(hlen1+48);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt11.val[0] = vld1q_u8(hlen2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt11.val[1] = vld1q_u8(hlen2+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt11.val[2] = vld1q_u8(hlen2+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt11.val[3] = vld1q_u8(hlen2+48);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt12.val[0] = vld1q_u8(hlen3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt12.val[1] = vld1q_u8(hlen3+16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt12.val[2] = vld1q_u8(hlen3+32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vt12.val[3] = vld1q_u8(hlen3+48);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vsum2 = vsum3 = vdupq_n_u16(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;ix < end - 15; ix += 16) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx1 = vld2q_u32((const unsigned int *)ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx2 = vld2q_u32((const unsigned int *)ix+8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v0 = vmlaq_n_u32(vx1.val[1], vx1.val[0], 8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v1 = vmlaq_n_u32(vx2.val[1], vx2.val[0], 8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x8_t v2 = vmovn_u16(vuzp1q_u16(vreinterpretq_u16_u32(v0), vreinterpretq_u16_u32(v1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vaddw_u8(vsum1, vqtbl4_u8(vt10, v2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vaddw_u8(vsum2, vqtbl4_u8(vt11, v2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vaddw_u8(vsum3, vqtbl4_u8(vt12, v2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;ix < end - 7; ix += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx = vld2q_u32((const unsigned int *)ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v0 = vmlaq_n_u32(vx.val[1], vx.val[0], 8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint16x4_t v1 = vmovn_u32(v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint8x8_t v2 = vmovn_u16(vcombine_u16(v1, v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum1 = vaddw_u8(vsum1, vreinterpret_u8_u32(vset_lane_u32(0, vreinterpret_u32_u8(vqtbl4_u8(vt10, v2)), 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum2 = vaddw_u8(vsum2, vreinterpret_u8_u32(vset_lane_u32(0, vreinterpret_u32_u8(vqtbl4_u8(vt11, v2)), 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum3 = vaddw_u8(vsum3, vreinterpret_u8_u32(vset_lane_u32(0, vreinterpret_u32_u8(vqtbl4_u8(vt12, v2)), 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += vaddlvq_u16(vsum1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += vaddlvq_u16(vsum2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += vaddlvq_u16(vsum3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;ix < end - 1;) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x0 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x1 = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x = x0 * xlen + x1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 += hlen1[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 += hlen2[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 += hlen3[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum3) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1 + 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const uint32_t table131415[16 * 16] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x00030101, 0x00050505, 0x00060707, 0x00080908, 0x00080a09, 0x00090a0a, 0x000a0b0a, 0x000a0b0b,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000a0c0a, 0x000b0c0b, 0x000b0c0c, 0x000c0d0c, 0x000c0d0d, 0x000c0d0d, 0x000d0e0e, 0x000e0b0e,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x00050404, 0x00050606, 0x00070808, 0x00080909, 0x00090a0a, 0x00090b0a, 0x000a0b0b, 0x000a0b0b,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000a0c0b, 0x000b0c0b, 0x000b0c0c, 0x000c0d0c, 0x000c0e0d, 0x000c0d0e, 0x000d0e0e, 0x000d0b0e,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x00060707, 0x00070808, 0x00070909, 0x00080a0a, 0x00090b0b, 0x00090b0b, 0x000a0c0c, 0x000a0c0c,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000a0d0b, 0x000b0c0c, 0x000b0d0c, 0x000c0d0d, 0x000c0d0d, 0x000d0e0e, 0x000d0e0f, 0x000d0c0f,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x00070908, 0x00080909, 0x00080a0a, 0x00090b0b, 0x00090b0b, 0x000a0c0c, 0x000a0c0c, 0x000b0c0c,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000b0d0c, 0x000b0d0d, 0x000c0e0d, 0x000c0e0d, 0x000c0e0d, 0x000d0f0e, 0x000d0f0f, 0x000d0d0f,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x00080a09, 0x00080a09, 0x00090b0b, 0x00090b0b, 0x000a0c0c, 0x000a0c0c, 0x000b0d0d, 0x000b0d0d,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000b0d0c, 0x000b0e0d, 0x000c0e0d, 0x000c0e0e, 0x000c0f0e, 0x000d0f0f, 0x000d0f0f, 0x000d0c10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x00090a0a, 0x00090a0a, 0x00090b0b, 0x000a0b0c, 0x000a0c0c, 0x000a0d0c, 0x000b0d0d, 0x000b0e0d,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000b0d0d, 0x000b0e0d, 0x000c0e0e, 0x000c0f0d, 0x000d0f0f, 0x000d0f0f, 0x000d1010, 0x000e0d10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000a0b0a, 0x00090b0b, 0x000a0b0c, 0x000a0c0c, 0x000a0d0d, 0x000b0d0d, 0x000b0d0d, 0x000b0d0d,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000b0e0d, 0x000c0e0e, 0x000c0e0e, 0x000c0e0e, 0x000d0f0f, 0x000d0f0f, 0x000e1010, 0x000e0d10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000a0b0b, 0x000a0b0b, 0x000a0c0c, 0x000b0c0d, 0x000b0d0d, 0x000b0d0d, 0x000b0d0e, 0x000c0e0e,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000c0e0e, 0x000c0f0e, 0x000c0f0f, 0x000c0f0f, 0x000d0f0f, 0x000d1110, 0x000d1112, 0x000e0d12,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000a0b0a, 0x000a0c0a, 0x000a0c0b, 0x000b0d0c, 0x000b0d0c, 0x000b0d0d, 0x000b0e0d, 0x000c0e0e,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000c0f0e, 0x000c0f0e, 0x000c0f0e, 0x000d0f0f, 0x000d100f, 0x000e1010, 0x000e1011, 0x000e0d11,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000a0c0b, 0x000a0c0b, 0x000b0c0c, 0x000b0d0c, 0x000b0d0d, 0x000b0e0d, 0x000c0e0d, 0x000c0f0f,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000c0f0e, 0x000d0f0f, 0x000d0f0f, 0x000d1010, 0x000d0f10, 0x000e1010, 0x000e0f12, 0x000e0e11,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000b0c0b, 0x000b0d0c, 0x000b0c0c, 0x000b0d0d, 0x000c0e0d, 0x000c0e0e, 0x000c0e0e, 0x000c0e0f,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000c0f0e, 0x000d100f, 0x000d1010, 0x000d100f, 0x000d1110, 0x000e1111, 0x000f1012, 0x000e0d13,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000b0d0c, 0x000b0d0c, 0x000b0d0c, 0x000b0d0d, 0x000c0e0e, 0x000c0e0e, 0x000c0f0e, 0x000c100e,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000d100f, 0x000d100f, 0x000d100f, 0x000d1010, 0x000e1011, 0x000e0f11, 0x000e1011, 0x000f0e12,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000c0d0c, 0x000c0e0d, 0x000b0e0d, 0x000c0e0e, 0x000c0e0e, 0x000c0f0f, 0x000d0f0e, 0x000d0f0f,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000d0f10, 0x000d1110, 0x000d1011, 0x000d1011, 0x000e1011, 0x000e1012, 0x000f1212, 0x000f0e12,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000c0f0d, 0x000c0e0d, 0x000c0e0e, 0x000c0e0f, 0x000c0f0f, 0x000d0f0f, 0x000d1010, 0x000d1010,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000d1010, 0x000e1210, 0x000e1110, 0x000e1111, 0x000e1112, 0x000e1311, 0x000f1112, 0x000f0e12,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000d0e0e, 0x000d0f0e, 0x000d0d0e, 0x000d0e0f, 0x000d100f, 0x000d100f, 0x000d0f11, 0x000d1010,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000e1010, 0x000e1113, 0x000e1211, 0x000e1111, 0x000f1311, 0x000f1113, 0x000e1012, 0x000f0e12,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000d0b0d, 0x000d0b0e, 0x000d0b0f, 0x000d0c10, 0x000d0c10, 0x000d0d10, 0x000d0d11, 0x000e0d10,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0x000e0e11, 0x000e0e11, 0x000e0e12, 0x000e0e12, 0x000f0e15, 0x000f0e14, 0x000f0e15, 0x000f0c12
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++inline static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_noESC_from3_neon_13to15(const int *ix, const int *end, int max, unsigned int * s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t1 = huf_tbl_noESC[max - 1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* No ESC-words */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum1 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum3 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t vxlen = vreinterpretq_s32_s64(vdupq_n_s64(4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint16x8_t vsum = vdupq_n_u16(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;ix < end - 3; ix += 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t vx = vshlq_u32(vld1q_u32((const unsigned int *)ix), vxlen);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint64x2_t v0 = vpaddlq_u32(vx);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x2_t v1 = vdup_n_u32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vset_lane_u32(table131415[vgetq_lane_u64(v0, 0)], v1, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vset_lane_u32(table131415[vgetq_lane_u64(v0, 1)], v1, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddw_u8(vsum, vreinterpret_u8_u32(v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;ix < end - 1; ix += 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x2_t vx = vshl_u32(vld1_u32((const unsigned int *)ix), vget_low_s32(vxlen));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x2_t v1 = vdup_n_u32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = vset_lane_u32(table131415[vaddv_u32(vx)], v1, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddw_u8(vsum, vreinterpret_u8_u32(v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint16x4_t vsums = vadd_u16(vget_low_u16(vsum), vget_high_u16(vsum));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = vget_lane_u16(vsums, 0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 = vget_lane_u16(vsums, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum3 = vget_lane_u16(vsums, 2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum1 > sum3) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum1 = sum3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t = t1 + 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_ESC_neon(const int *ix, const int *const end, int t1, const int t2, unsigned int *const s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ /* ESC-table is used */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int const linbits = ht[t1].xlen * 65536u + ht[t2].xlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int sum = 0, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t vlimit = vdupq_n_u32(15);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t vlinbits = vdupq_n_u32(linbits);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t vsum = vdupq_n_u32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(; ix < end - 7; ix += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4x2_t vx = vld2q_u32((const unsigned int *)ix);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v0 = vcgeq_u32(vx.val[0], vlimit);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v1 = vcgeq_u32(vx.val[1], vlimit);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v2 = vminq_u32(vx.val[0], vlimit);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ uint32x4_t v3 = vminq_u32(vx.val[1], vlimit);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddq_u32(vsum, vandq_u32(vlinbits, v0));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vsum = vaddq_u32(vsum, vandq_u32(vlinbits, v1));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v2 = vaddq_u32(vshlq_n_u32(v2, 4), v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[v2[0]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[v2[1]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[v2[2]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[v2[3]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += vaddvq_u32(vsum);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for(; ix < end - 1;) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int x = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int y = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (x >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (y >= 15u) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ y = 15u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += linbits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x <<= 4u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ x += y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum += largetbl[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum2 = sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum >>= 16u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (sum > sum2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ sum = sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ t1 = t2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ *s += sum;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return t1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*************************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* choose table */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*************************************************************************/
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -601,10 +919,27 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_7to9
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_7to9
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_10to12
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_10to12
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_13to15
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_13to15
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_13to15
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_13to15
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_13to15
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_13to15
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_13to15
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3_neon_13to15
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -613,6 +948,11 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ , &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++, &count_bit_noESC_from3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -621,7 +961,27 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int* s = (unsigned int*)_s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int max;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int choice, choice2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const int *ixp = ix;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t vmax = vdupq_n_s32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; ixp < end - 7; ixp += 8) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t v0 = vld1q_s32(ixp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t v1 = vld1q_s32(ixp+4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v0 = vmaxq_s32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vmaxq_s32(vmax, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; ixp < end - 3; ixp += 4) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t v0 = vld1q_s32(ixp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vmaxq_s32(vmax, v0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (; ixp < end - 1; ixp += 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x2_t v0 = vld1_s32(ixp);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vmax = vcombine_s32(vmax_s32(vget_low_s32(vmax), v0), vget_high_s32(vmax));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ max = vmaxvq_s32(vmax);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ max = ix_max(ix, end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (max <= 15) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return count_fncs[max](ix, end, max, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -643,7 +1003,11 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ break;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return count_bit_ESC_neon(ix, end, choice, choice2, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return count_bit_ESC(ix, end, choice, choice2, s);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/vbrquantize.c.orig 2012-02-07 22:36:35
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/vbrquantize.c 2023-04-12 19:50:11
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -33,6 +33,33 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "util.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "vbrquantize.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "quantize_pvt.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <arm_neon.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__aarch64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vaddvq_f32(a) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t b = vtrnq_f32(a, a); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t c = vaddq_f32(b.val[0], b.val[1]); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ vget_lane_f32(vadd_f32(vget_high_f32(c), vget_low_f32(c)), 0); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vuzp1q_f32(a, b) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t c = vuzpq_f32(a, b); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c.val[0]; \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vuzp2q_f32(a, b) ({ \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4x2_t c = vuzpq_f32(a, b); \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ c.val[1]; \
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++})
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if !defined(__ARM_FEATURE_FMA)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_f32 vmlaq_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_n_f32 vmlaq_n_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmsq_n_f32 vmlsq_n_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#elif !defined(__clang__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmaq_n_f32 vmlaq_n_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define vfmsq_n_f32 vmlsq_n_f32
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -226,7 +253,55 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int i = bw >> 2u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int const remaining = (bw & 0x03u);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__aarch64__) || defined(__arm__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t verr = vdupq_n_f32(0);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ for (;i > 1; i -= 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxr34_1 = vmulq_n_f32(vld1q_f32(xr34), sfpow34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxr34_2 = vmulq_n_f32(vld1q_f32(xr34+4), sfpow34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxr_1 = vabsq_f32(vld1q_f32(xr));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxr_2 = vabsq_f32(vld1q_f32(xr+4));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxrn_1 = vnegq_f32(vxr_1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxrn_2 = vnegq_f32(vxr_2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t vix_1 = vcvtq_s32_f32(vxr34_1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t vix_2 = vcvtq_s32_f32(vxr34_2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0 = vcombine_f32(vld1_f32(pow43+vgetq_lane_s32(vix_1, 0)), vld1_f32(pow43+vgetq_lane_s32(vix_1, 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v1 = vcombine_f32(vld1_f32(pow43+vgetq_lane_s32(vix_1, 2)), vld1_f32(pow43+vgetq_lane_s32(vix_1, 3)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v2 = vcombine_f32(vld1_f32(pow43+vgetq_lane_s32(vix_2, 0)), vld1_f32(pow43+vgetq_lane_s32(vix_2, 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v3 = vcombine_f32(vld1_f32(pow43+vgetq_lane_s32(vix_2, 2)), vld1_f32(pow43+vgetq_lane_s32(vix_2, 3)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v4 = vuzp1q_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v5 = vuzp2q_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v6 = vuzp1q_f32(v2, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v7 = vuzp2q_f32(v2, v3);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t verr1_1 = vfmsq_n_f32(vxr_1, v4, sfpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t verr2_1 = vfmaq_n_f32(vxrn_1, v5, sfpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t verr1_2 = vfmsq_n_f32(vxr_2, v6, sfpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t verr2_2 = vfmaq_n_f32(vxrn_2, v7, sfpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ verr1_1 = vminq_f32(verr1_1, verr2_1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ verr1_2 = vminq_f32(verr1_2, verr2_2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ verr = vfmaq_f32(verr, verr1_1, verr1_1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ verr = vfmaq_f32(verr, verr1_2, verr1_2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr34 += 8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (i-- > 0) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxr34 = vmulq_n_f32(vld1q_f32(xr34), sfpow34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxr = vabsq_f32(vld1q_f32(xr));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t vxrn = vnegq_f32(vxr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int32x4_t vix = vcvtq_s32_f32(vxr34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v0 = vcombine_f32(vld1_f32(pow43+vgetq_lane_s32(vix, 0)), vld1_f32(pow43+vgetq_lane_s32(vix, 1)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v1 = vcombine_f32(vld1_f32(pow43+vgetq_lane_s32(vix, 2)), vld1_f32(pow43+vgetq_lane_s32(vix, 3)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v2 = vuzp1q_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t v3 = vuzp2q_f32(v0, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t verr1 = vfmsq_n_f32(vxr, v2, sfpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float32x4_t verr2 = vfmaq_n_f32(vxrn, v3, sfpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ verr1 = vminq_f32(verr1, verr2);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ verr = vfmaq_f32(verr, verr1, verr1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xr34 += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ xfsf += vaddvq_f32(verr);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ while (i-- > 0) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[0] = sfpow34 * xr34[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[1] = sfpow34 * xr34[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[2] = sfpow34 * xr34[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -243,6 +318,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ xr += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ xr34 += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (remaining) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[0] = x[1] = x[2] = x[3] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ switch( remaining ) {
</span><span style='display:block; white-space:pre;color:#808080;'>diff --git a/audio/lame/files/lame-3.100-sse-20171014.diff b/audio/lame/files/lame-3.100-sse-20171014.diff
</span>new file mode 100644
<span style='display:block; white-space:pre;color:#808080;'>index 00000000000..3aa278051a7
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>--- /dev/null
</span><span style='display:block; white-space:pre;background:#e0e0ff;'>+++ b/audio/lame/files/lame-3.100-sse-20171014.diff
</span><span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -0,0 +1,1831 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+# https://tmkk.undo.jp/lame/lame-3.100-sse-20171014.diff
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/fft.c.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/fft.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -331,7 +331,7 @@ init_fft(lame_internal_flags * const gfc
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifdef HAVE_XMMINTRIN_H
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-#ifdef MIN_ARCH_SSE
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(MIN_ARCH_SSE) || defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->fft_fht = fht_SSE2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/gain_analysis.c.orig 2017-10-11 04:08:39.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/gain_analysis.c 2017-10-14 12:06:19.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -95,6 +95,9 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <stdio.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <stdlib.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <string.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <xmmintrin.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -109,6 +112,67 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*lint -save -e736 loss of precision */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const Float_t ABYule[9][2 * YULE_ORDER + 1 + 3] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.03857599435200, -3.84664617118067, -0.02160367184185, 7.81501653005538, -0.00123395316851,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -11.34170355132042, -0.00009291677959, 13.05504219327545, -0.01655260341619,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -12.28759895145294, 0.02161526843274, 9.48293806319790, -0.02074045215285, -5.87257861775999,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.00594298065125, 2.75465861874613, 0.00306428023191, -0.86984376593551, 0.00012025322027,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.13919314567432, 0.00288463683916, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.05418656406430, -3.47845948550071, -0.02911007808948, 6.36317777566148, -0.00848709379851,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -8.54751527471874, -0.00851165645469, 9.47693607801280, -0.00834990904936, -8.81498681370155,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.02245293253339, 6.85401540936998, -0.02596338512915, -4.39470996079559, 0.01624864962975,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 2.19611684890774, -0.00240879051584, -0.75104302451432, 0.00674613682247, 0.13149317958808,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00187763777362, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.15457299681924, -2.37898834973084, -0.09331049056315, 2.84868151156327, -0.06247880153653,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -2.64577170229825, 0.02163541888798, 2.23697657451713, -0.05588393329856, -1.67148153367602,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.04781476674921, 1.00595954808547, 0.00222312597743, -0.45953458054983, 0.03174092540049,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.16378164858596, -0.01390589421898, -0.05032077717131, 0.00651420667831, 0.02347897407020,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00881362733839, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.30296907319327, -1.61273165137247, -0.22613988682123, 1.07977492259970, -0.08587323730772,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.25656257754070, 0.03282930172664, -0.16276719120440, -0.00915702933434, -0.22638893773906,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02364141202522, 0.39120800788284, -0.00584456039913, -0.22138138954925, 0.06276101321749,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.04500235387352, -0.00000828086748, 0.02005851806501, 0.00205861885564, 0.00302439095741,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02950134983287, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.33642304856132, -1.49858979367799, -0.25572241425570, 0.87350271418188, -0.11828570177555,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.12205022308084, 0.11921148675203, -0.80774944671438, -0.07834489609479, 0.47854794562326,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00469977914380, -0.12453458140019, -0.00589500224440, -0.04067510197014, 0.05724228140351,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.08333755284107, 0.00832043980773, -0.04237348025746, -0.01635381384540, 0.02977207319925,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.01760176568150, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.44915256608450, -0.62820619233671, -0.14351757464547, 0.29661783706366, -0.22784394429749,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.37256372942400, -0.01419140100551, 0.00213767857124, 0.04078262797139, -0.42029820170918,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.12398163381748, 0.22199650564824, 0.04097565135648, 0.00613424350682, 0.10478503600251,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.06747620744683, -0.01863887810927, 0.05784820375801, -0.03193428438915, 0.03222754072173,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.00541907748707, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.56619470757641, -1.04800335126349, -0.75464456939302, 0.29156311971249, 0.16242137742230,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.26806001042947, 0.16744243493672, 0.00819999645858, -0.18901604199609, 0.45054734505008,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.30931782841830, -0.33032403314006, -0.27562961986224, 0.06739368333110, 0.00647310677246,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.04784254229033, 0.08647503780351, 0.01639907836189, -0.03788984554840, 0.01807364323573,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00588215443421, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.58100494960553, -0.51035327095184, -0.53174909058578, -0.31863563325245, -0.14289799034253,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.20256413484477, 0.17520704835522, 0.14728154134330, 0.02377945217615, 0.38952639978999,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.15558449135573, -0.23313271880868, -0.25344790059353, -0.05246019024463, 0.01628462406333,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02505961724053, 0.06920467763959, 0.02442357316099, -0.03721611395801, 0.01818801111503,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.00749618797172, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.53648789255105, -0.25049871956020, -0.42163034350696, -0.43193942311114, -0.00275953611929,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.03424681017675, 0.04267842219415, -0.04678328784242, -0.10214864179676, 0.26408300200955,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ 0.14590772289388, 0.15113130533216, -0.02459864859345, -0.17556493366449, -0.11202315195388,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.18823009262115, -0.04060034127000, 0.05477720428674, 0.04788665548180, 0.04704409688120,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -0.02217936801134, 0.0, 0.0, 0.0}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const Float_t ABButter[9][2 * BUTTER_ORDER + 1 + 3] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.98621192462708, -1.97223372919527, -1.97242384925416, 0.97261396931306, 0.98621192462708, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.98500175787242, -1.96977855582618, -1.97000351574484, 0.97022847566350, 0.98500175787242, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97938932735214, -1.95835380975398, -1.95877865470428, 0.95920349965459, 0.97938932735214, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97531843204928, -1.95002759149878, -1.95063686409857, 0.95124613669835, 0.97531843204928, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.97316523498161, -1.94561023566527, -1.94633046996323, 0.94705070426118, 0.97316523498161, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.96454515552826, -1.92783286977036, -1.92909031105652, 0.93034775234268, 0.96454515552826, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.96009142950541, -1.91858953033784, -1.92018285901082, 0.92177618768381, 0.96009142950541, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.95856916599601, -1.91542108074780, -1.91713833199203, 0.91885558323625, 0.95856916599601, 0.0, 0.0, 0.0},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ {0.94597685600279, -1.88903307939452, -1.89195371200558, 0.89487434461664, 0.94597685600279, 0.0, 0.0, 0.0}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const Float_t ABYule[9][multiple_of(4, 2 * YULE_ORDER + 1)] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* 20 18 16 14 12 10 8 6 4 2 0 19 17 15 13 11 9 7 5 3 1 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ { 0.00288463683916, 0.00012025322027, 0.00306428023191, 0.00594298065125, -0.02074045215285, 0.02161526843274, -0.01655260341619, -0.00009291677959, -0.00123395316851, -0.02160367184185, 0.03857599435200, 0.13919314567432, -0.86984376593551, 2.75465861874613, -5.87257861775999, 9.48293806319790,-12.28759895145294, 13.05504219327545,-11.34170355132042, 7.81501653005538, -3.84664617118067},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -133,7 +197,8 @@ static const Float_t ABButter[9][multipl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.96009142950541, 0.92177618768381, -1.92018285901082, -1.91858953033784, 0.96009142950541},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.95856916599601, 0.91885558323625, -1.91713833199203, -1.91542108074780, 0.95856916599601},
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {0.94597685600279, 0.89487434461664, -1.89195371200558, -1.88903307939452, 0.94597685600279}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*lint -restore */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -143,6 +208,128 @@ static const Float_t ABButter[9][multipl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* When calling this procedure, make sure that ip[-order] and op[-order] point to real data! */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++filterYule(const Float_t * input, Float_t * output, size_t nSamples, const Float_t * const kernel)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5, v6, v7, v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -12(%8), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -28(%8), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps -36(%8), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -16(%9), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -32(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps -40(%9), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %3, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %4, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %3, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %6, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %7, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %2, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss -40(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jmp 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %2, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %7, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %7, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %1, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%8), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %5, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x42, %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %4, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps (%11), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 16(%11), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 32(%11), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %7, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 48(%11), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 64(%11), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 80(%11), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %3, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE3__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "hsubps %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x01, %5, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss %4, (%9) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "add $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "add $4, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "dec %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "=x" (v6), "=x" (v7), "=x" (v8),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (input), "+r" (output), "+r" (nSamples)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (kernel)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++filterButter(const Float_t * input, Float_t * output, size_t nSamples, const Float_t * const kernel)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps -4(%5), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps -8(%6), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x4e, %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss -8(%5), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jmp 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%5), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x42, %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps (%8), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 16(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE3__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "hsubps %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x01, %2, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss %1, (%6) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "add $4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "add $4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "dec %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "+r" (input), "+r" (output), "+r" (nSamples)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (kernel)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ filterYule(const Float_t * input, Float_t * output, size_t nSamples, const Float_t * const kernel)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -188,6 +375,7 @@ filterButter(const Float_t * input, Floa
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ++input;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/l3side.h.orig 2012-02-07 22:36:35.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/l3side.h 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -46,7 +46,7 @@ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT xr[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- int l3_enc[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int l3_enc[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int scalefac[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT xrpow_max;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -84,7 +84,7 @@ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } gr_info;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- gr_info tt[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gr_info tt[2][2] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int main_data_begin;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int private_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int resvDrain_pre;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/lame.c.orig 2017-10-11 04:08:39.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/lame.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -2364,7 +2364,7 @@ lame_init_internal_flags(lame_internal_f
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->ov_rpg.noclipGainChange = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->ov_rpg.noclipScale = -1.0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- gfc->ATH = lame_calloc(ATH_t, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gfc->ATH = calloc_aligned16(1, sizeof(ATH_t));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (NULL == gfc->ATH)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return -2; /* maybe error codes should be enumerated in lame.h ?? */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -2455,7 +2455,7 @@ lame_init_old(lame_global_flags * gfp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfp->report.errorf = &lame_report_def;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfp->report.msgf = &lame_report_def;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- gfp->internal_flags = lame_calloc(lame_internal_flags, 1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ gfp->internal_flags = calloc_aligned16(1, sizeof(lame_internal_flags));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (lame_init_internal_flags(gfp->internal_flags) < 0) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ freegfc(gfp->internal_flags);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/newmdct.c.orig 2011-05-08 01:05:17.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/newmdct.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -36,10 +36,13 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "util.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "newmdct.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <xmmintrin.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifndef USE_GOGO_SUBBAND
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-static const FLOAT enwindow[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static const FLOAT enwindow[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -4.77e-07 * 0.740951125354959 / 2.384e-06, 1.03951e-04 * 0.740951125354959 / 2.384e-06,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 9.53674e-04 * 0.740951125354959 / 2.384e-06, 2.841473e-03 * 0.740951125354959 / 2.384e-06,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ 3.5758972e-02 * 0.740951125354959 / 2.384e-06, 3.401756e-03 * 0.740951125354959 / 2.384e-06, 9.83715e-04 * 0.740951125354959 / 2.384e-06, 9.9182e-05 * 0.740951125354959 / 2.384e-06, /* 15 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -435,6 +438,241 @@ window_subband(const sample_t * x1, FLOA
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const sample_t *x2 = &x1[238 - 14 - 286];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5, v6, v7, v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i=4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps -40(%12), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 32(%12), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 104(%12), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 176(%12), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %4, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %5, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 884(%9), %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -896(%10), %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %6, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 628(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -640(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 372(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -384(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 116(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -128(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps -24(%12), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 48(%12), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 120(%12), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 192(%12), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %4, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %5, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -140(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 128(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -396(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 384(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -652(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 640(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -908(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 896(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps -8(%12), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 64(%12), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 136(%12), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 208(%12), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %4, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %5, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -1036(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 1024(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -780(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 768(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -524(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 512(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -268(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 256(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 8(%12), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 80(%12), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 152(%12), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 224(%12), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %4, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %5, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -12(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 244(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -256(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 500(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -512(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 756(%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups -768(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %5, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps 24(%12), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps 96(%12), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps 168(%12), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps 240(%12), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %6, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %7, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %7, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups %6, (%11) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups %5, 16(%11) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $16, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $288, %12 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $32, %11 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $4, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $4, %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $128, %11 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subl $16, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $288, %12 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $32, %11 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $4, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subl $4, %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subl $128, %11 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "=x" (v6), "=x" (v7), "=x" (v8),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (i), "+r" (x1), "+r" (x2), "+r" (a)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (wp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "memory"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ wp = enwindow + 280;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = -15; i < 0; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT w, s, t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -501,6 +739,7 @@ window_subband(const sample_t * x1, FLOA
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x1--;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x2++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT s, t, u, v;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ t = x1[-16] * wp[-10];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/psymodel.c.orig 2017-09-07 04:38:23.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/psymodel.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -155,6 +155,9 @@ blocktype_d[2] block type to use
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "fft.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame-analysis.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <xmmintrin.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define NSFIRLEN 21
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -218,10 +221,58 @@ psycho_loudness_approx(FLOAT const *ener
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int i;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT loudness_power;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5, v6, v7, v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i = 32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %2, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %3, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%9), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%9), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 32(%9), %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 48(%9), %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps (%10), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 16(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 32(%10), %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps 48(%10), %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %6, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %7, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $64, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $64, %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $64, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $64, %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE3__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "haddps %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x01, %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "=x" (v6), "=x" (v7), "=x" (v8),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (i), "+r" (eql_w), "+r" (energy)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ _mm_store_ss(&loudness_power, v1);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ loudness_power = 0.0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* apply weights to power in freq. bands */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i < BLKSIZE / 2; ++i)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ loudness_power += energy[i] * eql_w[i];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ loudness_power *= VO_SCALE;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return loudness_power;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -666,6 +717,9 @@ static void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2], int chn,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int gr_out, FLOAT fftenergy[HBLKSIZE], FLOAT(*wsamp_l)[BLKSIZE])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5, v6, v7;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -676,6 +730,47 @@ vbrpsy_compute_fft_l(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else if (chn == 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const sqrt2_half = SQRT2 * 0.5f;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT *wsamp_lp1 = *wsamp_l;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT *wsamp_lp2 = *wsamp_l+1024;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j = 128;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10), %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %6, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%8), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%8), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%9), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%9), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %3, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %2, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %6, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %6, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %6, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, (%8) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, 16(%8) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %4, 0(%9) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, 16(%9) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $32, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $32, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $32, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $32, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "=x" (v6), "=x" (v7),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (j), "+r" (wsamp_lp1), "+r" (wsamp_lp2)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (&sqrt2_half)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "memory"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* FFT data for mid and side channel is derived from L & R */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const l = wsamp_l[0][j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -683,6 +778,7 @@ vbrpsy_compute_fft_l(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_l[0][j] = (l + r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ wsamp_l[1][j] = (l - r) * sqrt2_half;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -691,6 +787,73 @@ vbrpsy_compute_fft_l(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[0] = wsamp_l[0][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ fftenergy[0] *= fftenergy[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT *wsamp_lp1 = *wsamp_l+1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT *wsamp_lp2 = *wsamp_l+1020;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT *fftenergyp = fftenergy+1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j = 64;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpeqd %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrld $26, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pslld $24, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %5, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%7), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 16(%7), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%8), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps -16(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %2, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %3, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %2, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %3, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %4, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %4, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups %0, (%9) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups %1, 16(%9) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $32, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $32, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $32, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $2048, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $32, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subl $32, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $32, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subl $2048, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%9), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 16(%9), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %2, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps 32(%9), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %5, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE3__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "haddps %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x01, %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss %0, (%10) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "=x" (v6),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (j), "+r" (wsamp_lp1), "+r" (wsamp_lp2), "+r" (fftenergyp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (&psv->tot_ener[chn])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "memory"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = BLKSIZE / 2 - 1; j >= 0; --j) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const re = (*wsamp_l)[BLKSIZE / 2 - j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT const im = (*wsamp_l)[BLKSIZE / 2 + j];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -704,6 +867,7 @@ vbrpsy_compute_fft_l(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ psv->tot_ener[chn] = totalenergy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (plt) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (j = 0; j < HBLKSIZE; j++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -772,7 +936,7 @@ vbrpsy_attack_detection(lame_internal_fl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT energy[4], FLOAT sub_short_factor[4][3], int ns_attacks[4][4],
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int uselongblock[2])
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT ns_hpfsmpl[2][576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT ns_hpfsmpl[2][576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ PsyStateVar_t *const psv = &gfc->sv_psy;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -785,14 +949,170 @@ vbrpsy_attack_detection(lame_internal_fl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Don't copy the input buffer into a temporary buffer */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* unroll the loop 2 times */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (chn = 0; chn < n_chn_out; chn++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- static const FLOAT fircoef[] = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ static const FLOAT fircoef[] __attribute__ ((aligned (16))) = {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- -5.52212e-17 * 2, -0.313819 * 2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ -5.52212e-17 * 2, -0.313819 * 2, 0, 0
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* apply high pass filter of fs/4 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const sample_t *const firbuf = &buffer[chn][576 - 350 - NSFIRLEN + 192];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ //assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5, v6, v7;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float *firbufp = (float *)firbuf;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ float *ns_hpfsmplp = &ns_hpfsmpl[chn][0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ i = 144;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 40(%8), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpcklps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "unpckhps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%10), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 72(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 16(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 56(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 32(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 32(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 40(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %2, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%10), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 72(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 16(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 56(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 32(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 32(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 40(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %2, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %2, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %6, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%10), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 72(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 16(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 56(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 32(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 32(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 40(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %2, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%10), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 72(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 16(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 56(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 32(%10), %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 32(%8), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups 40(%8), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x1b, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %5, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %2, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %2, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %6, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $4, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE3__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "haddps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x31, %2, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x31, %3, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %3, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x88, %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, (%9) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "=x" (v6), "=x" (v7),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (i), "+r" (firbufp), "+r" (ns_hpfsmplp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (fircoef)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "memory"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (i = 0; i < 576; i++) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT sum1, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ sum1 = firbuf[i + 10];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -803,6 +1123,7 @@ vbrpsy_attack_detection(lame_internal_fl
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ns_hpfsmpl[chn][i] = sum1 + sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ masking_ratio[gr_out][chn].en = psv->en[chn];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ masking_ratio[gr_out][chn].thm = psv->thm[chn];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (n_chn_psy > 2) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1423,10 +1744,10 @@ L3psycho_anal_vbr(lame_internal_flags *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* fft and energy calculation */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT(*wsamp_l)[BLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT(*wsamp_s)[3][BLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT fftenergy[HBLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT fftenergy_s[3][HBLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT wsamp_L[2][BLKSIZE];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT wsamp_S[2][3][BLKSIZE_s];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT fftenergy[HBLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT fftenergy_s[3][HBLKSIZE_s] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT wsamp_L[2][BLKSIZE] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT wsamp_S[2][3][BLKSIZE_s] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT eb[4][CBANDS], thr[4][CBANDS];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT sub_short_factor[4][3];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/quantize.c.orig 2017-08-15 22:40:45.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/quantize.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -99,7 +99,7 @@ init_xrpow_core_init(lame_internal_flags
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->init_xrpow_core = init_xrpow_core_sse;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #ifndef HAVE_NASM
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-#ifdef MIN_ARCH_SSE
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(MIN_ARCH_SSE) || defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->init_xrpow_core = init_xrpow_core_sse;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1495,7 +1495,7 @@ VBR_old_iteration_loop(lame_internal_fla
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[2][2][SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int bands[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int frameBits[15];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int used_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1904,7 +1904,7 @@ ABR_iteration_loop(lame_internal_flags *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ EncResult_t *const eov = &gfc->ov_enc;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int targ_bits[2][2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mean_bits, max_frame_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int ch, gr, ath_over;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -1991,7 +1991,7 @@ CBR_iteration_loop(lame_internal_flags *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ SessionConfig_t const *const cfg = &gfc->cfg;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT l3_xmin[SFBMAX];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT xrpow[576];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT xrpow[576] __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int targ_bits[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mean_bits, max_bits;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int gr, ch;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/quantize_pvt.c.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/quantize_pvt.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -27,6 +27,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -37,6 +38,9 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame-analysis.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include <float.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <xmmintrin.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #define NSATHSCALE 100 /* Assuming dynamic range=96dB, this value should be 92 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -767,6 +771,70 @@ calc_noise_core_c(const gr_info * const
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else if (j > cod_info->big_values) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int remaining = l & 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ l = l >> 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const int *ixp = ix+j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const FLOAT *xrp = cod_info->xr+j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j += 4*l;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = _mm_set_ss(step);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %3, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "testl %5, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jz 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpeqd %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrld $1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pxor %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%6), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpeqd %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pandn %4, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%7), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %3, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE3__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "haddps %3, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %3, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x01, %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "+x" (v5),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (l), "+r" (ixp), "+r" (xrp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ _mm_store_ss(&noise, v4);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (remaining) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT ix01[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix01[0] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ ix01[1] = step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - ix01[ix[j]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - ix01[ix[j]];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT ix01[2];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix01[0] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix01[1] = step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -779,8 +847,95 @@ calc_noise_core_c(const gr_info * const
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5, v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int remaining = l & 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ l = l >> 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(_WIN64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long long tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const int *ixp = ix+j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ const FLOAT *xrp = cod_info->xr+j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j += 4*l;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v5 = _mm_set_ss(step);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %5, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "testl %6, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jz 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movslq (%7), %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movslq 4(%7), %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movslq 8(%7), %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movslq 12(%7), %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movl (%7), %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movl 4(%7), %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movl 8(%7), %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movl 12(%7), %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %3, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x88, %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %4, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%8), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "maxps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %5, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE3__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "haddps %5, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %5, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x01, %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "+x" (v5), "=x" (v6),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (l), "+r" (ixp), "+r" (xrp), "=&r" (tmp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (pow43)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ _mm_store_ss(&noise, v6);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if (remaining) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (l--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ temp = fabs(cod_info->xr[j]) - pow43[ix[j]] * step;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -790,6 +945,7 @@ calc_noise_core_c(const gr_info * const
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ j++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ noise += temp * temp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *startline = j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/takehiro.c.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/takehiro.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -26,6 +26,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -34,6 +35,9 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "quantize_pvt.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "tables.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <xmmintrin.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static const struct {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ const int region0_count;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -229,6 +233,57 @@ quantize_lines_xrpow(unsigned int l, FLO
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ l = l >> 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ remaining = l % 2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ l = l >> 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5, v6;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = _mm_set_ss(istep);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(_WIN64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long long tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "testl %6, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jz 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %5, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%7), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %5, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttss2si %0, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0xe5, %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttss2si %1, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttss2si %1, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x55, %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttss2si %1, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%10,%9,4), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlhps %3, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x88, %4, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttps2dq %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups %0, (%8) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "+x" (v6),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (l), "+r" (xr), "+r" (ix), "=&r" (tmp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (adj43)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "memory"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (l--) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT x0, x1, x2, x3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int rx0, rx1, rx2, rx3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -250,6 +305,7 @@ quantize_lines_xrpow(unsigned int l, FLO
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ XRPOW_FTOI(x2, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ XRPOW_FTOI(x3, *ix++);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ };
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (remaining) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT x0, x1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int rx0, rx1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -423,6 +479,80 @@ quantize_xrpow(const FLOAT * xp, int *pi
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix_max(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int max;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pxor %2, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $8, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subl $8, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "je 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%3), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE4_1__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pmaxud %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpgtd %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pand %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pandn %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "por %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %4, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jl 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jne 3f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movq (%3), %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE4_1__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pmaxud %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpgtd %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pand %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pandn %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "por %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "3: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE4_1__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrldq $8, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pmaxud %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrldq $4, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pmaxud %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrldq $8, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpgtd %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pand %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pandn %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "por %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrldq $4, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpgtd %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pand %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pandn %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "por %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movd %0, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (ix), "+r" (end), "=r" (max)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return max;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int max1 = 0, max2 = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -437,6 +567,7 @@ ix_max(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (max1 < max2)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ max1 = max2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return max1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -447,12 +578,74 @@ ix_max(const int *ix, const int *end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-count_bit_ESC(const int *ix, const int *const end, int t1, const int t2, unsigned int *const s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++count_bit_ESC(const int *ix, const int *end, int t1, const int t2, unsigned int *const s)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* ESC-table is used */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int const linbits = ht[t1].xlen * 65536u + ht[t2].xlen;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int sum = 0, sum2;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ unsigned int tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ static short mult[8] __attribute__ ((aligned (16))) = {16, 1, 16, 1, 16, 1, 16, 1};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%6), %%xmm4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpeqd %%xmm2, %%xmm2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %%xmm2, %%xmm3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrlw $13, %%xmm2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psllw $4, %%xmm3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psllw $1, %%xmm2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pxor %%xmm5, %%xmm5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $8, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %3, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "je 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%0), %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "packssdw %%xmm0, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %%xmm0, %%xmm1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "paddusw %%xmm3, %%xmm1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpgtw %%xmm2, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psubw %%xmm0, %%xmm5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pmaddwd %%xmm4, %%xmm1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movd %%xmm1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrlq $32, %%xmm1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cltq \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl 1088(%5,%%rax,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movd %%xmm1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cltq \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl 1088(%5,%%rax,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %3, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jl 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %%xmm5, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrlq $32, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "paddw %%xmm0, %%xmm5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jne 3f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movq (%0), %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "packssdw %%xmm0, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %%xmm0, %%xmm1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "paddusw %%xmm3, %%xmm1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpgtw %%xmm2, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psubw %%xmm0, %%xmm5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pmaddwd %%xmm4, %%xmm1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movd %%xmm1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cltq \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl 1088(%5,%%rax,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "3: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movdqa %%xmm5, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrld $16, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "paddw %%xmm5, %%xmm0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pextrw $0, %%xmm0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "imull %4, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "+r" (ix), "+r" (sum), "=&a" (tmp), "+r" (end)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (linbits), "r" (largetbl), "r" (mult)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ do {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int x = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int y = *ix++;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -469,6 +662,7 @@ count_bit_ESC(const int *ix, const int *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x += y;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ sum += largetbl[x];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } while (ix < end);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ sum2 = sum & 0xffffu;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ sum >>= 16u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -790,10 +984,178 @@ count_bits(lame_internal_flags const *co
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ j += width;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(_WIN64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long long k = j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long k = j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j += width;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(_WIN64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long long l = j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long l = j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = _mm_set_ss(roundfac);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "testq $0x3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jz 7f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "je 6f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "8: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%7,%5,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltss %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%8,%5,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss %1, (%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "incq %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "testq $0x3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jz 7f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jne 8b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "7: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $8, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jg 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%7,%5,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%7,%5,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltps %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%8,%5,4), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%8,%5,4), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %3, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %4, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, (%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, 16(%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $8, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jle 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $8, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "je 6f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subq $4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jg 4f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "3: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%7,%5,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%8,%5,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, (%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jle 3b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "4: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "je 6f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "5: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%7,%5,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltss %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%8,%5,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss %1, (%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "incq %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpq %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jne 5b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "6: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "+x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (k), "+r" (l)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (xr), "r" (ix)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int k = j;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ j += width;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v1 = _mm_set_ss(roundfac);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "testl $0x3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jz 7f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "je 6f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "8: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%7,%5,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltss %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%8,%5,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss %1, (%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "incl %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "testl $0x3, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jz 7f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jne 8b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "7: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subl $8, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jg 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%7,%5,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%7,%5,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltps %0, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%8,%5,4), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps 16(%8,%5,4), %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %3, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %4, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, (%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, 16(%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $8, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jle 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $8, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "je 6f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subl $4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jg 4f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "3: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%7,%5,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps (%8,%5,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, (%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $4, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jle 3b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "4: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $4, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "je 6f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "5: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%7,%5,4), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpnltss %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss (%8,%5,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movss %1, (%8,%5,4) \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "incl %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpl %6, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jne 5b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "6: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "+x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (k), "+r" (j)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (xr), "r" (ix)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int k;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ for (k = j, j += width; k < j; ++k) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ix[k] = (xr[k] >= roundfac) ? ix[k] : 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/util.c.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/util.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -140,7 +140,7 @@ freegfc(lame_internal_flags * const gfc)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ gfc->VBR_seek_table.size = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (gfc->ATH) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- free(gfc->ATH);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ free_aligned16(gfc->ATH);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (gfc->sv_rpg.rgdata) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ free(gfc->sv_rpg.rgdata);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -162,7 +162,7 @@ freegfc(lame_internal_flags * const gfc)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ free_global_data(gfc);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- free(gfc);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ free_aligned16(gfc);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/util.h.orig 2017-09-07 04:33:36.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/util.h 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -116,6 +116,36 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ typedef struct plotting_data plotting_data;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__APPLE__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define malloc_aligned16(size) malloc(size)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define calloc_aligned16(n, size) calloc(n, size)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define free_aligned16(ptr) free(ptr)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#elif defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define malloc_aligned16(size) _aligned_malloc(size, 16)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline void * calloc_aligned16(size_t n, size_t size)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ void *ptr = _aligned_malloc(n * size, 16);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(ptr) memset(ptr, 0, n * size);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return ptr;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define free_aligned16(ptr) _aligned_free(ptr)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline void * malloc_aligned16(size_t size)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ void *ptr;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int ret = posix_memalign(&ptr, 16, size);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return ret == 0 ? ptr : NULL;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++static inline void * calloc_aligned16(size_t n, size_t size)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ void *ptr;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ int ret = posix_memalign(&ptr, 16, n * size);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ if(!ret) memset(ptr, 0, n * size);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ return ret == 0 ? ptr : NULL;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#define free_aligned16(ptr) free(ptr)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /***********************************************************************
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Global Type Definitions
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -178,7 +208,7 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT psfb12[PSFB12]; /* ATH for partitionned sfb12 in short blocks */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT cb_l[CBANDS]; /* ATH for long block convolution bands */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ FLOAT cb_s[CBANDS]; /* ATH for short block convolution bands */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- FLOAT eql_w[BLKSIZE / 2]; /* equal loudness weights (based on ATH) */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ FLOAT eql_w[BLKSIZE / 2] __attribute__ ((aligned (16))); /* equal loudness weights (based on ATH) */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } ATH_t;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /**
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -492,7 +522,7 @@ extern "C" {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* variables used by lame.c */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ Bit_stream_struc bs;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+- III_side_info_t l3_side;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ III_side_info_t l3_side __attribute__ ((aligned (16)));
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ scalefac_struct scalefac_band;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+--- libmp3lame/vbrquantize.c.orig 2012-02-07 22:36:35.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++++ libmp3lame/vbrquantize.c 2017-10-14 12:03:44.000000000 +0900
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -26,6 +26,7 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ # include <config.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#undef TAKEHIRO_IEEE754_HACK
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "lame.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "machine.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -34,7 +35,9 @@
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "vbrquantize.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ #include "quantize_pvt.h"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+-
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#include <xmmintrin.h>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ struct algo_s;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -226,6 +229,81 @@ calc_sfb_noise_x34(const FLOAT * xr, con
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int i = bw >> 2u;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ unsigned int const remaining = (bw & 0x03u);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __m128 v1, v2, v3, v4, v5, v6, v7, v8;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(_WIN64)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long long tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ long tmp;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v6 = _mm_set_ss(sfpow34);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ v7 = _mm_set_ss(sfpow);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ __asm__ __volatile__ (
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "xorps %7, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "testl %8, %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jz 2f \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %5, %5 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x00, %6, %6 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "pcmpeqd %4, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "psrld $1, %4 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "1: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%10), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %5, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttss2si %1, %11 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0xe5, %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps (%12,%11,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttss2si %1, %11 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhps (%12,%11,4), %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttss2si %1, %11 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x55, %1, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movlps (%12,%11,4), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cvttss2si %1, %11 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movups (%9), %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhps (%12,%11,4), %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %6, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %6, %3 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %2, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x88, %3, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0xdd, %3, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %4, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %1, %2 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "subps %0, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "cmpltps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "andnps %2, %1 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "orps %1, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "mulps %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__x86_64__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addq $16, %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %9 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addl $16, %10 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "decl %8 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "jnz 1b \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movhlps %7, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#if defined(__SSE3__)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "haddps %7, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "movaps %7, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "shufps $0x01, %0, %0 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "addps %0, %7 \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "2: \n\t"
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "=x" (v1), "=x" (v2), "=x" (v3), "=x" (v4), "=x" (v5), "+x" (v6), "+x" (v7), "=x" (v8),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ "+r" (i), "+r" (xr), "+r" (xr34), "=&r" (tmp)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ : "r" (pow43)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ );
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++ _mm_store_ss(&xfsf, v8);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#else
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ while (i-- > 0) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[0] = sfpow34 * xr34[0];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[1] = sfpow34 * xr34[1];
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+@@ -243,6 +321,7 @@ calc_sfb_noise_x34(const FLOAT * xr, con
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ xr += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ xr34 += 4;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>++#endif
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (remaining) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ x[0] = x[1] = x[2] = x[3] = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ switch( remaining ) {
</span></pre><pre style='margin:0'>
</pre>