#line 3470 "dwf.nw" #ifndef _SSE_H #define _SSE_H #line 3487 "dwf.nw" typedef REAL vReal __attribute__((mode(V4SF),aligned(16))); #line 3494 "dwf.nw" typedef struct { REAL re, im; } complex; typedef struct { vReal re, im; } vcomplex; #line 3508 "dwf.nw" typedef struct SU3 { complex v[3][3]; } SU3; typedef struct { vcomplex v[3][3]; } vSU3; #line 3522 "dwf.nw" typedef struct { vcomplex f[4][3]; } vFermion; typedef struct { vcomplex f[2][3]; } vHalfFermion; #line 3536 "dwf.nw" typedef struct { vFermion f; } vEvenFermion; typedef struct { vFermion f; } vOddFermion; #line 3559 "dwf.nw" static inline vReal vmk1(REAL a) { vReal v = __builtin_ia32_loadss((float *)&a); asm("shufps\t$0,%0,%0" : "+x" (v)); return v; } #line 3568 "dwf.nw" static inline vReal vmk4(REAL a0, REAL a1, REAL a2, REAL a3) { vReal v; REAL *r = (REAL *)&v; r[0] = a0; r[1] = a1; r[2] = a2; r[3] = a3; return v; } #line 3581 "dwf.nw" static inline REAL vsum(vReal v) { REAL *vv = (REAL *)&v; return vv[0] + vv[1] + vv[2] + vv[3]; } #line 3591 "dwf.nw" static inline void vput_3(vReal *v, REAL a3) { ((REAL *)v)[3] = a3; } static inline void vput_0(vReal *v, REAL a0) { ((REAL *)v)[0] = a0; } #line 3613 "dwf.nw" static inline vReal shift_up1(vReal a, vReal b) { vReal x = a; vReal y = b; asm("shufps\t$0x30,%0,%1\n\t" "shufps\t$0x29,%1,%0" : "+x" (x), "+x" (y)); return x; } #line 3627 "dwf.nw" static inline vReal shift_up2(vReal a, vReal b) { vReal x = a; asm("shufps\t$0x4e,%1,%0" : "+x" (x): "x" (b)); return x; } #line 3639 "dwf.nw" static inline vReal shift_up3(vReal a, vReal b) { vReal x = a; asm("shufps\t$0x03,%1,%0\n\t" "shufps\t$0x9c,%1,%0" : "+x" (x): "x" (b)); return x; } #line 3652 "dwf.nw" static inline vReal shift_down1(vReal a, vReal b) { return shift_up3(a, b); } #line 3661 "dwf.nw" static inline vReal shift_down2(vReal a, vReal b) { return shift_up2(a, b); } #line 3670 "dwf.nw" static inline vReal shift_down3(vReal a, vReal b) { return shift_up1(a, b); } #line 3678 "dwf.nw" static inline void vhfzero(vHalfFermion *v) { vReal z = vmk1(0.0); v->f[0][0].re = v->f[0][0].im = v->f[0][1].re = v->f[0][1].im = v->f[0][2].re = v->f[0][2].im = v->f[1][0].re = v->f[1][0].im = v->f[1][1].re = v->f[1][1].im = v->f[1][2].re = v->f[1][2].im = z; } #line 3474 "dwf.nw" #endif