#line 3719 "dwf.nw" #include #include "sse-dwf-cg.h" #line 1650 "dwf.nw" #include #line 3399 "dwf.nw" #include #line 3464 "dwf.nw" #define Vs 4 /* Length of SSE vector */ #define REAL float /* floating point type compatible with vReal */ #include #line 838 "dwf.nw" #define PAD16(size) (15+(size)) #define ALIGN16(addr) ((void *)(~15 & (15 + (size_t)(addr)))) #line 3711 "dwf.nw" #define Nc 3 /* Number of colors */ #define DIM 4 /* number of dimensions */ #define Fd 4 /* Fermion representation dimension */ #line 649 "dwf.nw" struct SSE_DWF_Fermion { vEvenFermion *even; vOddFermion *odd; }; #line 762 "dwf.nw" struct SSE_DWF_Gauge { complex v[Nc][Nc]; }; #line 875 "dwf.nw" struct memblock { struct memblock *next; struct memblock *prev; void *data; size_t size; }; #line 1203 "dwf.nw" struct bounds { int lo[DIM]; int hi[DIM]; }; #line 1301 "dwf.nw" struct neighbor { int size; /* size of site table */ int inside_size; /* number of inside sites */ int boundary_size; /* number of boundary sites */ int snd_size[2*DIM]; /* size of send buffers in 8 dirs */ int rcv_size[2*DIM]; /* size of receive buffers */ int *snd[2*DIM]; /* i->x translation for send buffers */ int *inside; /* i->x translation for inside sites */ struct boundary *boundary; /* i->x,mask translation for boundary */ struct site *site; /* x->site translation for sites */ vHalfFermion *snd_buf[2*DIM]; /* Send buffers */ vHalfFermion *rcv_buf[2*DIM]; /* Receive buffers */ int qmp_size[4*DIM]; /* sizes of QMP buffers */ void *qmp_xbuf[4*DIM]; /* QMP snd/rcv buffer addresses */ vHalfFermion *qmp_buf[4*DIM]; /* send and receive buffers for QMP */ QMP_msgmem_t qmp_mm[4*DIM]; /* msgmem's for send and receive */ int Nx; /* number of msegs */ QMP_msghandle_t qmp_sh[2*DIM]; /* handles for sends */ QMP_msghandle_t qmp_sv[2*DIM]; /* copies of handles for finilization */ int qmp_smask; /* send flags for qmp_sh[] */ int Ns; /* number of send handles */ QMP_msghandle_t qmp_rh[2*DIM]; /* handles for receives */ int Nr; /* number of receive handles */ QMP_msghandle_t qmp_cr; /* common receive handle */ }; #line 1333 "dwf.nw" struct boundary { int index; /* x-index of this boundary site */ int mask; /* bitmask of the borders */ }; #line 1342 "dwf.nw" struct site { int Uup; /* up-links are Uup, Uup+1, Uup+2, Uup+3 */ int Udown[DIM]; /* four down-links */ int F[2*DIM]; /* eight neighboring fermions on the other sublattice */ }; #line 556 "dwf.nw" static int inited_p = 0; #line 601 "dwf.nw" static void *(*tmalloc)(size_t size); static void (*tfree)(void *ptr); #line 628 "dwf.nw" static int tlattice[DIM+1]; #line 808 "dwf.nw" static SU3 *U; #line 866 "dwf.nw" static struct memblock memblock = { &memblock, &memblock, NULL, 0 }; #line 951 "dwf.nw" static int network[DIM]; static int coord[DIM]; #line 1183 "dwf.nw" static struct bounds bounds; static int gauge_XYZT; static int S_4, S_4_1; #line 1213 "dwf.nw" static struct neighbor neighbor; static struct neighbor odd_even; static struct neighbor even_odd; #line 1851 "dwf.nw" static vOddFermion *auxA_o, *auxB_o, *Phi_o; static vEvenFermion *auxA_e; #line 1924 "dwf.nw" static vOddFermion *r_o, *p_o, *q_o; #line 1945 "dwf.nw" vEvenFermion *auxB_e; #line 3370 "dwf.nw" static REAL c0; static vReal va0; static vReal va1; static vReal va2; static vReal va3; static vReal va4; static vReal ab_LA; static vReal ab_LB; #line 3447 "dwf.nw" static struct neighbor *sending = 0; #line 885 "dwf.nw" static vEvenFermion *allocate_even_fermion(void); static vOddFermion *allocate_odd_fermion(void); static SSE_DWF_Gauge *allocate_gauge_field(void); /* vFermion *allocate_subfermion(int size); */ #line 1091 "dwf.nw" static inline vReal import_vector(const void *z, void *env, SSE_DWF_fermion_reader reader, int x[DIM+1], int c, int d, int re_im) { vReal f; REAL *v = (REAL *)&f; int i, xs; for (xs = x[DIM], i = 0; i < Vs; i++, x[DIM]++) { *v++ = reader(z, env, x, c, d, re_im); } x[DIM] = xs; return f; } #line 1140 "dwf.nw" static inline void save_vector(void *z, void *env, SSE_DWF_fermion_writer writer, int x[DIM+1], int c, int d, int re_im, vReal f) { REAL *v = (REAL *)&f; int i, xs; for (xs = x[DIM], i = 0; i < Vs; i++, x[DIM]++) { writer(z, env, x, c, d, re_im, *v++); } x[DIM] = xs; } #line 1219 "dwf.nw" static inline int lattice_start(int lat, int net, int coord) { int q = lat / net; int r = lat % net; return coord * q + ((coord < r)? coord: r); } static inline void mk_sublattice(struct bounds *bounds, int coord[]) { int i; for (i = 0; i < DIM; i++) { bounds->lo[i] = lattice_start(tlattice[i], network[i], coord[i]); bounds->hi[i] = lattice_start(tlattice[i], network[i], coord[i] + 1); } } #line 1243 "dwf.nw" static void init_neighbor(struct bounds *bounds, struct neighbor *neighbor); #line 1366 "dwf.nw" static void build_neighbor(struct neighbor *out, struct bounds *bounds, int parity, struct neighbor *in); #line 1467 "dwf.nw" static void construct_rec(struct neighbor *out, int par, struct bounds *bounds, int dir, int step); #line 1582 "dwf.nw" static int to_HFlinear(int p[], struct bounds *b, int q, int z) { int x, d; for (x = 0, d = 4; d--;) { int v = p[d] + ((d == q)?z:0); int s = b->hi[d] - b->lo[d]; if (v < 0) v += tlattice[d]; if (v >= tlattice[d]) v -= tlattice[d]; x = x * s + v - b->lo[d]; } return x / 2; } #line 1605 "dwf.nw" static int to_Ulinear(int p[], struct bounds *b, int q) { int x, d; if ((q < 0) || (p[q] > b->lo[q]) || (network[q] < 2)) { #line 1621 "dwf.nw" for (x = 0, d = 4; d--;) { int s = b->hi[d] - b->lo[d]; int v = p[d] - ((q == d)?1:0); if (v < 0) v += tlattice[d]; x = x * s + v - b->lo[d]; } return 4 * x + ((q < 0)?0:q); #line 1614 "dwf.nw" } else { #line 1633 "dwf.nw" int s0, v0; for (d = 0, v0 = 1; d < 4; d++) v0 *= b->hi[d] - b->lo[d]; for (d = 0, s0 = 4 * v0; d < q; d++) s0 += v0 / (b->hi[d] - b->lo[d]); for (d = 4, x = 0; d--;) { int s = b->hi[d] - b->lo[d]; int v = p[d]; if (d == q) continue; x = x * s + v - b->lo[d]; } return s0 + x; #line 1616 "dwf.nw" } } #line 1661 "dwf.nw" static int build_buffers(struct neighbor *nb); #line 1722 "dwf.nw" static int make_buffer(struct neighbor *nb, int size); #line 1742 "dwf.nw" static void make_send(struct neighbor *nb, int k, int i, int d); #line 1759 "dwf.nw" static int make_receive(struct neighbor *nb, int k, int i, int d, QMP_msghandle_t Rh[2*DIM], int Nr); #line 1780 "dwf.nw" static void sse_aligned_buffer(struct neighbor *nb, int k, int size); #line 1802 "dwf.nw" static void free_buffers(struct neighbor *nb); #line 1875 "dwf.nw" static int cg(vOddFermion *psi, const vOddFermion *b, const vOddFermion *x0, double epsilon, int max_iter, double *out_eps, int *out_iter); #line 1953 "dwf.nw" static void copy_o(vOddFermion *dst, const vOddFermion *src); #line 1972 "dwf.nw" static void compute_sum2_o(vOddFermion *dst, double alpha, const vOddFermion *src); #line 1990 "dwf.nw" static void compute_sum2x_o(vOddFermion *dst, const vOddFermion *src, double alpha); #line 2009 "dwf.nw" static void compute_sum_e(vEvenFermion *d, const vEvenFermion *x, double alpha, const vEvenFermion *y); static void compute_sum_o(vOddFermion *d, const vOddFermion *x, double alpha, const vOddFermion *y); #line 2050 "dwf.nw" static void compute_sum_oN(vOddFermion *d, double *norm, const vOddFermion *x, double alpha, const vOddFermion *y); #line 2075 "dwf.nw" static void compute_sum2_oN(vOddFermion *d, double *norm, double alpha, const vOddFermion *y); #line 2101 "dwf.nw" static void compute_MxM(vOddFermion *eta, double *norm, const vOddFermion *psi); static void compute_M(vOddFermion *eta, double *norm, const vOddFermion *psi); static void compute_Mx(vOddFermion *eta, const vOddFermion *psi); #line 2142 "dwf.nw" static void compute_Qxx1(vFermion *eta, const vFermion *psi, int xyzt); static void inline compute_Qee1(vEvenFermion *eta, const vEvenFermion *psi) { compute_Qxx1(&eta->f, &psi->f, even_odd.size); } static void inline compute_Qoo1(vOddFermion *eta, const vOddFermion *psi) { compute_Qxx1(&eta->f, &psi->f, odd_even.size); } static void compute_Soo1(vOddFermion *eta, const vOddFermion *psi); #line 2614 "dwf.nw" static void compute_Qxy(vFermion *d, const vFermion *s, struct neighbor *nb); static void inline compute_Qoe(vOddFermion *d, const vEvenFermion *s) { compute_Qxy(&d->f, &s->f, &odd_even); } static void inline compute_Qeo(vEvenFermion *d, const vOddFermion *s) { compute_Qxy(&d->f, &s->f, &even_odd); } static void compute_1Sxy(vFermion *d, const vFermion *q, const vFermion *s, struct neighbor *nb); static void inline compute_1Soe(vOddFermion *d, const vOddFermion *q, const vEvenFermion *s) { compute_1Sxy(&d->f, &q->f, &s->f, &odd_even); } #line 3299 "dwf.nw" static void compute_Qxx1Qxy(vFermion *d, const vFermion *s, struct neighbor *nb); static void inline compute_Qee1Qeo(vEvenFermion *d, const vOddFermion *s) { compute_Qxx1Qxy(&d->f, &s->f, &even_odd); } static void compute_Sxx1Sxy(vFermion *d, const vFermion *s, struct neighbor *nb); static void inline compute_See1Seo(vEvenFermion *d, const vOddFermion *s) { compute_Sxx1Sxy(&d->f, &s->f, &even_odd); } static void compute_1Qxx1Qxy(vFermion *d, double *norm, const vFermion *q, const vFermion *s, struct neighbor *nb); static void inline compute_1Qoo1Qoe(vOddFermion *d, double *norm, const vOddFermion *q, const vEvenFermion *s) { compute_1Qxx1Qxy(&d->f, norm, &q->f, &s->f, &odd_even); } #line 3697 "dwf.nw" static inline int parity(const int x[DIM]) { int i, v; for (i = v = 0; i < DIM; i++) v += x[i]; return v & 1; } #line 817 "dwf.nw" static void * alloc16(int size) { int xsize = PAD16(size + sizeof (struct memblock)); struct memblock *p = tmalloc(xsize); if (p == 0) return p; p->data = ALIGN16(&p[1]); p->size = size; p->next = memblock.next; p->prev = &memblock; p->next->prev = p; p->prev->next = p; return p->data; } #line 843 "dwf.nw" static void free16(void *ptr) { struct memblock *p; if (ptr == 0) return; for (p = memblock.next; p != &memblock; p = p->next) { if (p->data != ptr) continue; p->next->prev = p->prev; p->prev->next = p->next; tfree(p); return; } /* this is BAD: control should reach here! */ } #line 894 "dwf.nw" vEvenFermion * allocate_even_fermion(void) { return alloc16(even_odd.size * S_4 * sizeof (vFermion)); } vOddFermion * allocate_odd_fermion(void) { return alloc16(odd_even.size * S_4 * sizeof (vFermion)); } SSE_DWF_Gauge * allocate_gauge_field(void) { return alloc16(gauge_XYZT * sizeof (SSE_DWF_Gauge)); } #line 1166 "dwf.nw" static int init_tables(void) { struct neighbor tmp; int i, v; init_neighbor(&bounds, &neighbor); #line 1188 "dwf.nw" S_4 = tlattice[DIM] / 4; S_4_1 = S_4 - 1; for (v = 1, i = 0; i < DIM; i++) { v *= bounds.hi[i] - bounds.lo[i]; } gauge_XYZT = DIM * v; for (i = 0; i < DIM; i++) { if (network[i] < 2) continue; gauge_XYZT += v / (bounds.hi[i] - bounds.lo[i]); } #line 1174 "dwf.nw" tmp = neighbor; build_neighbor(&even_odd, &bounds, 0, &tmp); build_neighbor(&odd_even, &bounds, 1, &tmp); return 0; } #line 1247 "dwf.nw" static void init_neighbor(struct bounds *bounds, struct neighbor *neighbor) { int i; mk_sublattice(bounds, coord); neighbor->qmp_smask = 0; #line 1261 "dwf.nw" for (neighbor->size = 1, neighbor->inside_size = 1, i = 0; i < DIM; i++) { int ext = bounds->hi[i] - bounds->lo[i]; neighbor->size *= ext; if (network[i] > 1) neighbor->inside_size *= ext - 2; else neighbor->inside_size *= ext; } neighbor->boundary_size = neighbor->size - neighbor->inside_size; neighbor->site = tmalloc(neighbor->size * sizeof (struct site)); #line 1255 "dwf.nw" #line 1274 "dwf.nw" if (neighbor->inside_size) neighbor->inside = tmalloc(neighbor->inside_size * sizeof (int)); else neighbor->inside = 0; #line 1256 "dwf.nw" #line 1280 "dwf.nw" if (neighbor->boundary_size) neighbor->boundary = tmalloc(neighbor->boundary_size * sizeof (struct boundary)); else neighbor->boundary = 0; #line 1257 "dwf.nw" #line 1286 "dwf.nw" for (i = 0; i < 2 * DIM; i++) { int d = i / 2; if (network[d] > 1) { neighbor->snd_size[i] = neighbor->size / (bounds->hi[d] - bounds->lo[d]); neighbor->snd[i] = tmalloc(neighbor->snd_size[i] * sizeof (int)); } else { neighbor->snd_size[i] = 0; neighbor->snd[i] = 0; } } #line 1258 "dwf.nw" } #line 1351 "dwf.nw" static void build_neighbor(struct neighbor *out, struct bounds *bounds, int par, struct neighbor *in) { int i,d, s, p, m; int x[DIM]; #line 1374 "dwf.nw" *out = *in; out->size = 0; out->inside_size = 0; out->boundary_size = 0; for (d = 0; d < DIM; d++) { out->rcv_size[2*d] = out->snd_size[2*d] = 0; out->rcv_size[2*d+1] = out->snd_size[2*d+1] = 0; } #line 1361 "dwf.nw" #line 990 "dwf.nw" for (i = 0; i < DIM; i++) x[i] = bounds->lo[i]; for (i = 0; i < DIM;) { #line 1386 "dwf.nw" s = parity(x); if (s != par) goto next; #line 1401 "dwf.nw" p = to_HFlinear(x, bounds, -1, 0); for (m = 0, d = 0; d < DIM; d++) { if (network[d] > 1) { if (x[d] == bounds->lo[d]) m |= 1 << (2 * d); if (x[d] + 1 == bounds->hi[d]) m |= 1 << (2 * d + 1); } } #line 1390 "dwf.nw" #line 1414 "dwf.nw" if (m) { #line 1430 "dwf.nw" in->boundary->index = p; in->boundary->mask = m; in->boundary++; out->boundary_size++; for (d = 0; d < 2*DIM; d++) { if ((m & (1 << d)) == 0) continue; *in->snd[d]++ = p; out->snd_size[d]++; } #line 1416 "dwf.nw" } else { #line 1423 "dwf.nw" *in->inside++ = p; out->inside_size++; #line 1418 "dwf.nw" } #line 1391 "dwf.nw" #line 1444 "dwf.nw" in->site->Uup = to_Ulinear(x, bounds, -1); for (d = 0; d < DIM; d++) { in->site->Udown[d] = to_Ulinear(x, bounds, d); if ((m & (1 << (2 * d))) == 0) in->site->F[2*d] = S_4 * to_HFlinear(x, bounds, d, -1); if ((m & (1 << (2 * d + 1))) == 0) in->site->F[2*d + 1] = S_4 * to_HFlinear(x, bounds, d, +1); } #line 1392 "dwf.nw" out->size++; in->size++; next: #line 1002 "dwf.nw" for (i = 0; i < DIM; i++) { #line 1025 "dwf.nw" if (++x[i] == bounds->hi[i]) x[i] = bounds->lo[i]; else break; #line 1004 "dwf.nw" } } #line 1362 "dwf.nw" #line 1457 "dwf.nw" for (d = 0; d < DIM; d++) { if (network[d] < 2) continue; construct_rec(out, par, bounds, d, +1); construct_rec(out, par, bounds, d, -1); } #line 1363 "dwf.nw" } #line 1474 "dwf.nw" static void construct_rec(struct neighbor *out, int par, struct bounds *bounds, int dir, int step) { struct bounds xb; int xc[DIM], x[DIM]; int s, d, p, k; int dz = dir * 2 + ((step>0)?1:0); #line 1493 "dwf.nw" for (d = 0; d < DIM; d++) { int v = coord[d] + ((d==dir)?step:0); if (v < 0) v += network[d]; if (v >= network[d]) v -= network[d]; xc[d] = v; } mk_sublattice(&xb, xc); #line 1487 "dwf.nw" #line 1506 "dwf.nw" for (d = 0; d < DIM; d++) x[d] = ((d == dir) && (step < 0))? (xb.hi[d] - 1): xb.lo[d]; #line 1488 "dwf.nw" #line 1514 "dwf.nw" /* ZZZ: This needs some cleaning */ k = 0; do { for (d = 0, s = par; d < DIM; d++) s += x[d]; if (!(s & 1)) goto next; #line 1538 "dwf.nw" p = to_HFlinear(x, bounds, dir, -step); #line 1523 "dwf.nw" #line 1541 "dwf.nw" out->site[p].F[dz] = S_4 * k++; #line 1525 "dwf.nw" next: for (d = 0; d < DIM; d++) { if (d == dir) continue; if (++x[d] == xb.hi[d]) x[d] = xb.lo[d]; else break; } } while (d != DIM); out->rcv_size[dz^1] = k; /* XXX is it true? */ #line 1489 "dwf.nw" } #line 1664 "dwf.nw" static int build_buffers(struct neighbor *nb) { int i, k, Nr; QMP_msghandle_t Rh[2*DIM]; Nr = nb->Ns = nb->Nx = 0; for (i = 0; i < DIM; i++) { switch (network[i]) { case 1: break; case 2: #line 1691 "dwf.nw" k = make_buffer(nb, nb->snd_size[2*i] + nb->snd_size[2*i+1]); nb->snd_buf[2*i] = nb->qmp_buf[k]; nb->snd_buf[2*i+1] = nb->snd_buf[2*i] + S_4 * nb->snd_size[2*i]; make_send(nb, k, i, +1); k = make_buffer(nb, nb->rcv_size[2*i] + nb->rcv_size[2*i+1]); nb->rcv_buf[2*i] = nb->qmp_buf[k]; nb->rcv_buf[2*i+1] = nb->snd_buf[2*i] + S_4 * nb->snd_size[2*i]; Nr = make_receive(nb, k, i, -1, Rh, Nr); /* -1 here helps with a bug in GigE QMP */ #line 1676 "dwf.nw" break; default: /* Order here is important */ #line 1712 "dwf.nw" k = make_buffer(nb, nb->snd_size[2*i]); nb->snd_buf[2*i] = nb->qmp_buf[k]; make_send(nb, k, i, -1); k = make_buffer(nb, nb->rcv_size[2*i]); nb->rcv_buf[2*i] = nb->qmp_buf[k]; Nr = make_receive(nb, k, i, -1, Rh, Nr); #line 1680 "dwf.nw" #line 1703 "dwf.nw" k = make_buffer(nb, nb->snd_size[2*i+1]); nb->snd_buf[2*i+1] = nb->qmp_buf[k]; make_send(nb, k, i, +1); k = make_buffer(nb, nb->rcv_size[2*i+1]); nb->rcv_buf[2*i+1] = nb->qmp_buf[k]; Nr = make_receive(nb, k, i, +1, Rh, Nr); #line 1681 "dwf.nw" break; } } #line 1773 "dwf.nw" nb->qmp_cr = QMP_declare_multiple(Rh, Nr); #line 1685 "dwf.nw" return 0; } #line 1725 "dwf.nw" static int make_buffer(struct neighbor *nb, int size) { int bcount = size * S_4 * sizeof (vHalfFermion); int N = nb->Nx; nb->qmp_size[N] = size; sse_aligned_buffer(nb, N, bcount); nb->qmp_mm[N] = QMP_declare_msgmem(nb->qmp_buf[N], bcount); nb->Nx = N + 1; return N; } #line 1745 "dwf.nw" static void make_send(struct neighbor *nb, int k, int i, int d) { QMP_msghandle_t h = QMP_declare_send_relative(nb->qmp_mm[k], i, d, 1); int j = 2 * i + ((d < 0)? 0: 1); nb->qmp_sh[j] = h; nb->qmp_sv[nb->Ns++] = h; nb->qmp_smask |= (1 << j); } #line 1763 "dwf.nw" static int make_receive(struct neighbor *nb, int k, int i, int d, QMP_msghandle_t Rh[2*DIM], int Nr) { Rh[Nr] = QMP_declare_receive_relative(nb->qmp_mm[k], i, d, 1); return Nr+1; } #line 1783 "dwf.nw" static void sse_aligned_buffer(struct neighbor *nb, int k, int size) { int xcount = size + 15; char *ptr = QMP_allocate_aligned_memory(xcount); nb->qmp_buf[k] = (void *)(~15 & (15 + (unsigned long)(ptr))); nb->qmp_xbuf[k] = ptr; } #line 1805 "dwf.nw" static void free_buffers(struct neighbor *nb) { int i; #line 1821 "dwf.nw" QMP_free_msghandle(nb->qmp_cr); #line 1811 "dwf.nw" #line 1826 "dwf.nw" for (i = nb->Ns; i--;) QMP_free_msghandle(nb->qmp_sv[i]); #line 1812 "dwf.nw" #line 1831 "dwf.nw" for (i = nb->Nx; i--;) { QMP_free_msgmem(nb->qmp_mm[i]); QMP_free_aligned_memory(nb->qmp_xbuf[i]); } #line 1813 "dwf.nw" } #line 1882 "dwf.nw" static int cg(vOddFermion *x_o, const vOddFermion *b, const vOddFermion *x0, double epsilon, int N, double *out_eps, int *out_N) { double rho, alpha, beta, gamma, norm_z; int status = 1; int k; copy_o(x_o, x0); compute_MxM(p_o, &norm_z, x_o); compute_sum_oN(r_o, &rho, b, -1, p_o); copy_o(p_o, r_o); #line 3408 "dwf.nw" /* relax, QMP does not support split reductions yet. */ #line 1899 "dwf.nw" for (k = 0; (rho > epsilon) && (k < N); k++) { compute_MxM(q_o, &norm_z, p_o); #line 3408 "dwf.nw" /* relax, QMP does not support split reductions yet. */ #line 1902 "dwf.nw" alpha = rho / norm_z; compute_sum2_oN(r_o, &gamma, -alpha, q_o); compute_sum2_o(x_o, alpha, p_o); #line 3408 "dwf.nw" /* relax, QMP does not support split reductions yet. */ #line 1906 "dwf.nw" if (gamma < epsilon) { rho = gamma; status = 0; break; } beta = gamma / rho; rho = gamma; compute_sum2x_o(p_o, r_o, beta); } #line 3435 "dwf.nw" if (sending) { int i; /* This is QMP_wait_vector(nb->qmp_sv, nb->Ns); */ for (i = sending->Ns; i--;) QMP_wait(sending->qmp_sv[i]); sending = 0; } #line 1916 "dwf.nw" *out_N = k; *out_eps = rho; return status; } #line 1956 "dwf.nw" static void copy_o(vOddFermion *dst, const vOddFermion *src) { int i = odd_even.size * S_4 * sizeof (vOddFermion) / sizeof (vReal); vReal *d = (vReal *)dst; const vReal *s = (const vReal *)src; for ( ;i--;) *d++ = *s++; } #line 1975 "dwf.nw" static void compute_sum2_o(vOddFermion *dst, double alpha, const vOddFermion *src) { int i = odd_even.size * S_4 * sizeof (vOddFermion) / sizeof (vReal); vReal a = vmk1(alpha); vReal *d = (vReal *)dst; const vReal *s = (const vReal *)src; for ( ;i--;) *d++ += a * *s++; } #line 1993 "dwf.nw" static void compute_sum2x_o(vOddFermion *dst, const vOddFermion *src, double alpha) { int i = odd_even.size * S_4 * sizeof (vOddFermion) / sizeof (vReal); vReal a = vmk1(alpha); vReal *d = (vReal *)dst; const vReal *s = (const vReal *)src; for ( ;i--; d++) *d += a * *d + *s++; } #line 2015 "dwf.nw" static void compute_sum_e(vEvenFermion *d, const vEvenFermion *x, double alpha, const vEvenFermion *y) { const vReal *X = (const vReal *)x; const vReal *Y = (const vReal *)y; vReal *D = (vReal *)d; vReal a = vmk1(alpha); int i = even_odd.size * S_4 * sizeof (vEvenFermion) / sizeof (vReal); for (;i--;) *D++ = *X++ + a * *Y++; } #line 2030 "dwf.nw" static void compute_sum_o(vOddFermion *d, const vOddFermion *x, double alpha, const vOddFermion *y) { const vReal *X = (const vReal *)x; const vReal *Y = (const vReal *)y; vReal *D = (vReal *)d; vReal a = vmk1(alpha); int i = odd_even.size * S_4 * sizeof (vOddFermion) / sizeof (vReal); for (;i--;) *D++ = *X++ + a * *Y++; } #line 2053 "dwf.nw" static void compute_sum_oN(vOddFermion *d, double *norm, const vOddFermion *x, double alpha, const vOddFermion *y) { const vReal *X = (const vReal *)x; const vReal *Y = (const vReal *)y; vReal *D = (vReal *)d; vReal a = vmk1(alpha); vReal s = vmk1(0.0); vReal v; int i = odd_even.size * S_4 * sizeof (vOddFermion) / sizeof (vReal); for (;i--;) { v = *X++ + a * *Y++; s += v * v; *D++ = v; } *norm = vsum(s); #line 3457 "dwf.nw" QMP_sum_double(norm); #line 2072 "dwf.nw" } #line 2078 "dwf.nw" static void compute_sum2_oN(vOddFermion *d, double *norm, double alpha, const vOddFermion *y) { const vReal *Y = (const vReal *)y; vReal *D = (vReal *)d; vReal a = vmk1(alpha); vReal s = vmk1(0.0); vReal v; int i = odd_even.size * S_4 * sizeof (vOddFermion) / sizeof (vReal); for (;i--;) { v = *D + a * *Y++; s += v * v; *D++ = v; } *norm = vsum(s); #line 3457 "dwf.nw" QMP_sum_double(norm); #line 2096 "dwf.nw" } #line 2109 "dwf.nw" static void compute_MxM(vOddFermion *eta, double *norm, const vOddFermion *psi) { compute_M(auxB_o, norm, psi); compute_Mx(eta, auxB_o); } #line 2120 "dwf.nw" static void compute_M(vOddFermion *eta, double *norm, const vOddFermion *psi) { compute_Qee1Qeo(auxA_e, psi); compute_1Qoo1Qoe(eta, norm, psi, auxA_e); } #line 2130 "dwf.nw" static void compute_Mx(vOddFermion *eta, const vOddFermion *psi) { compute_Soo1(auxA_o, psi); compute_See1Seo(auxA_e, auxA_o); compute_1Soe(eta, psi, auxA_e); } #line 2157 "dwf.nw" static void compute_Qxx1(vFermion *chi, const vFermion *psi, int size) { const vFermion *qs, *qx5; #line 3332 "dwf.nw" int i, xyzt5, s, c; vFermion * __restrict__ rx5, * __restrict__ rs; #line 2162 "dwf.nw" #line 3359 "dwf.nw" vReal fx; vHalfFermion zV; vcomplex zn, z1, z2, z3; complex zX[2][3]; vHalfFermion xOut; vHalfFermion yOut; #line 2164 "dwf.nw" for (i = 0; i < size; i++) { xyzt5 = i * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 2167 "dwf.nw" #line 3355 "dwf.nw" qx5 = &psi[xyzt5]; #line 2168 "dwf.nw" #line 2269 "dwf.nw" vhfzero(&zV); fx = ab_LA; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2272 "dwf.nw" for (s = 0; s < S_4_1; s++, fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2276 "dwf.nw" } rs = &rx5[S_4_1]; QSETUP(S_4_1) vput_3(&fx, c0); #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2281 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2284 "dwf.nw" zn.re = qs->f[0][c].re; zn.im = qs->f[0][c].im; vput_3(&zn.re, zX[0][c].re); vput_3(&zn.im, zX[0][c].im); rs->f[0][c].re = zn.re; rs->f[0][c].im = zn.im; zn.re = qs->f[1][c].re; zn.im = qs->f[1][c].im; vput_3(&zn.re, zX[1][c].re); vput_3(&zn.im, zX[1][c].im); rs->f[1][c].re = zn.re; rs->f[1][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2417 "dwf.nw" for (s = S_4; s--;) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2463 "dwf.nw" zn.re = rs->f[0][c].re; zn.im = rs->f[0][c].im; z1.re = shift_down1(zn.re, xOut.f[0][c].re); z1.im = shift_down1(zn.im, xOut.f[0][c].im); z2.re = shift_down2(zn.re, xOut.f[0][c].re); z2.im = shift_down2(zn.im, xOut.f[0][c].im); z3.re = shift_down3(zn.re, xOut.f[0][c].re); z3.im = shift_down3(zn.im, xOut.f[0][c].im); rs->f[0][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[0][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[0][c].re; yOut.f[0][c].im = rs->f[0][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2421 "dwf.nw" #line 2480 "dwf.nw" zn.re = rs->f[1][c].re; zn.im = rs->f[1][c].im; z1.re = shift_down1(zn.re, xOut.f[1][c].re); z1.im = shift_down1(zn.im, xOut.f[1][c].im); z2.re = shift_down2(zn.re, xOut.f[1][c].re); z2.im = shift_down2(zn.im, xOut.f[1][c].im); z3.re = shift_down3(zn.re, xOut.f[1][c].re); z3.im = shift_down3(zn.im, xOut.f[1][c].im); rs->f[1][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[1][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[1][c].re; yOut.f[1][c].im = rs->f[1][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2422 "dwf.nw" } } #line 2377 "dwf.nw" vhfzero(&zV); fx = ab_LB; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2380 "dwf.nw" for (s = S_4; --s; fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2384 "dwf.nw" } rs = &rx5[0]; QSETUP(0) vput_0(&fx, c0); #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2389 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2391 "dwf.nw" zn.re = qs->f[2][c].re; zn.im = qs->f[2][c].im; vput_0(&zn.re, zX[0][c].re); vput_0(&zn.im, zX[0][c].im); rs->f[2][c].re = zn.re; rs->f[2][c].im = zn.im; zn.re = qs->f[3][c].re; zn.im = qs->f[3][c].im; vput_0(&zn.re, zX[1][c].re); vput_0(&zn.im, zX[1][c].im); rs->f[3][c].re = zn.re; rs->f[3][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2447 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2565 "dwf.nw" zn.re = rs->f[2][c].re; zn.im = rs->f[2][c].im; z1.re = shift_up1(xOut.f[0][c].re, zn.re); z1.im = shift_up1(xOut.f[0][c].im, zn.im); z2.re = shift_up2(xOut.f[0][c].re, zn.re); z2.im = shift_up2(xOut.f[0][c].im, zn.im); z3.re = shift_up3(xOut.f[0][c].re, zn.re); z3.im = shift_up3(xOut.f[0][c].im, zn.im); rs->f[2][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[2][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[2][c].re; yOut.f[0][c].im = rs->f[2][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2451 "dwf.nw" #line 2582 "dwf.nw" zn.re = rs->f[3][c].re; zn.im = rs->f[3][c].im; z1.re = shift_up1(xOut.f[1][c].re, zn.re); z1.im = shift_up1(xOut.f[1][c].im, zn.im); z2.re = shift_up2(xOut.f[1][c].re, zn.re); z2.im = shift_up2(xOut.f[1][c].im, zn.im); z3.re = shift_up3(xOut.f[1][c].re, zn.re); z3.im = shift_up3(xOut.f[1][c].im, zn.im); rs->f[3][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[3][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[3][c].re; yOut.f[1][c].im = rs->f[3][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2452 "dwf.nw" } } #line 2169 "dwf.nw" } } #line 2176 "dwf.nw" static void compute_Soo1(vOddFermion *Chi, const vOddFermion *Psi) { vFermion *chi = &Chi->f; const vFermion *psi = &Psi->f; int size = odd_even.size; const vFermion *qs, *qx5; #line 3332 "dwf.nw" int i, xyzt5, s, c; vFermion * __restrict__ rx5, * __restrict__ rs; #line 2184 "dwf.nw" #line 3359 "dwf.nw" vReal fx; vHalfFermion zV; vcomplex zn, z1, z2, z3; complex zX[2][3]; vHalfFermion xOut; vHalfFermion yOut; #line 2186 "dwf.nw" for (i = 0; i < size; i++) { xyzt5 = i * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 2189 "dwf.nw" #line 3355 "dwf.nw" qx5 = &psi[xyzt5]; #line 2190 "dwf.nw" #line 2311 "dwf.nw" vhfzero(&zV); fx = ab_LA; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2314 "dwf.nw" for (s = 0; s < S_4_1; s++, fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2318 "dwf.nw" } rs = &rx5[S_4_1]; QSETUP(S_4_1) vput_3(&fx, c0); #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2323 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2325 "dwf.nw" zn.re = qs->f[2][c].re; zn.im = qs->f[2][c].im; vput_3(&zn.re, zX[0][c].re); vput_3(&zn.im, zX[0][c].im); rs->f[2][c].re = zn.re; rs->f[2][c].im = zn.im; zn.re = qs->f[3][c].re; zn.im = qs->f[3][c].im; vput_3(&zn.re, zX[1][c].re); vput_3(&zn.im, zX[1][c].im); rs->f[3][c].re = zn.re; rs->f[3][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2427 "dwf.nw" for (s = S_4; s--;) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2497 "dwf.nw" zn.re = rs->f[2][c].re; zn.im = rs->f[2][c].im; z1.re = shift_down1(zn.re, xOut.f[0][c].re); z1.im = shift_down1(zn.im, xOut.f[0][c].im); z2.re = shift_down2(zn.re, xOut.f[0][c].re); z2.im = shift_down2(zn.im, xOut.f[0][c].im); z3.re = shift_down3(zn.re, xOut.f[0][c].re); z3.im = shift_down3(zn.im, xOut.f[0][c].im); rs->f[2][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[2][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[2][c].re; yOut.f[0][c].im = rs->f[2][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2431 "dwf.nw" #line 2514 "dwf.nw" zn.re = rs->f[3][c].re; zn.im = rs->f[3][c].im; z1.re = shift_down1(zn.re, xOut.f[1][c].re); z1.im = shift_down1(zn.im, xOut.f[1][c].im); z2.re = shift_down2(zn.re, xOut.f[1][c].re); z2.im = shift_down2(zn.im, xOut.f[1][c].im); z3.re = shift_down3(zn.re, xOut.f[1][c].re); z3.im = shift_down3(zn.im, xOut.f[1][c].im); rs->f[3][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[3][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[3][c].re; yOut.f[1][c].im = rs->f[3][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2432 "dwf.nw" } } #line 2349 "dwf.nw" vhfzero(&zV); fx = ab_LB; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2352 "dwf.nw" for (s = S_4; --s; fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2356 "dwf.nw" } rs = &rx5[0]; QSETUP(0) vput_0(&fx, c0); #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2361 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2364 "dwf.nw" zn.re = qs->f[0][c].re; zn.im = qs->f[0][c].im; vput_0(&zn.re, zX[0][c].re); vput_0(&zn.im, zX[0][c].im); rs->f[0][c].re = zn.re; rs->f[0][c].im = zn.im; zn.re = qs->f[1][c].re; zn.im = qs->f[1][c].im; vput_0(&zn.re, zX[1][c].re); vput_0(&zn.im, zX[1][c].im); rs->f[1][c].re = zn.re; rs->f[1][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2437 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2531 "dwf.nw" zn.re = rs->f[0][c].re; zn.im = rs->f[0][c].im; z1.re = shift_up1(xOut.f[0][c].re, zn.re); z1.im = shift_up1(xOut.f[0][c].im, zn.im); z2.re = shift_up2(xOut.f[0][c].re, zn.re); z2.im = shift_up2(xOut.f[0][c].im, zn.im); z3.re = shift_up3(xOut.f[0][c].re, zn.re); z3.im = shift_up3(xOut.f[0][c].im, zn.im); rs->f[0][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[0][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[0][c].re; yOut.f[0][c].im = rs->f[0][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2441 "dwf.nw" #line 2548 "dwf.nw" zn.re = rs->f[1][c].re; zn.im = rs->f[1][c].im; z1.re = shift_up1(xOut.f[1][c].re, zn.re); z1.im = shift_up1(xOut.f[1][c].im, zn.im); z2.re = shift_up2(xOut.f[1][c].re, zn.re); z2.im = shift_up2(xOut.f[1][c].im, zn.im); z3.re = shift_up3(xOut.f[1][c].re, zn.re); z3.im = shift_up3(xOut.f[1][c].im, zn.im); rs->f[1][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[1][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[1][c].re; yOut.f[1][c].im = rs->f[1][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2442 "dwf.nw" } } #line 2191 "dwf.nw" } } #line 2642 "dwf.nw" static void compute_Qxy(vFermion *chi, const vFermion *psi, struct neighbor *nb) { #line 3332 "dwf.nw" int i, xyzt5, s, c; vFermion * __restrict__ rx5, * __restrict__ rs; #line 2648 "dwf.nw" #line 3337 "dwf.nw" int xyzt, k, d; const vFermion *f; vHalfFermion *g; vHalfFermion gg[8], hh[8]; vSU3 V[8]; int ps[8], p5[8]; #line 3345 "dwf.nw" const SU3 *Uup, *Udown; int c1, c2; #line 2650 "dwf.nw" #line 2767 "dwf.nw" #define qx5 rx5 #define qs rs #line 2651 "dwf.nw" #line 3413 "dwf.nw" QMP_start(nb->qmp_cr); #line 2652 "dwf.nw" #line 3435 "dwf.nw" if (sending) { int i; /* This is QMP_wait_vector(nb->qmp_sv, nb->Ns); */ for (i = sending->Ns; i--;) QMP_wait(sending->qmp_sv[i]); sending = 0; } #line 2653 "dwf.nw" #line 2782 "dwf.nw" { int k, i, s, c, *src; const vFermion *f; vHalfFermion *g; k = 0; #line 2815 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2819 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2788 "dwf.nw" k = 1; #line 2825 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2829 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2789 "dwf.nw" k = 2; #line 2835 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2839 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2790 "dwf.nw" k = 3; #line 2845 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2849 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2791 "dwf.nw" k = 4; #line 2855 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2859 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2792 "dwf.nw" k = 5; #line 2865 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2869 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2793 "dwf.nw" k = 6; #line 2875 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2879 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2794 "dwf.nw" k = 7; #line 2885 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2889 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2795 "dwf.nw" } #line 2654 "dwf.nw" #line 2897 "dwf.nw" for (i = 0; i < nb->inside_size; i++) { xyzt = nb->inside[i]; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 2901 "dwf.nw" #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 2902 "dwf.nw" #line 2919 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 2936 "dwf.nw" for (c = 0; c < 3; c++) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2938 "dwf.nw" k=1; f=&psi[ps[1]]; g=&gg[1]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2939 "dwf.nw" k=2; f=&psi[ps[2]]; g=&gg[2]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2940 "dwf.nw" k=3; f=&psi[ps[3]]; g=&gg[3]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2941 "dwf.nw" k=4; f=&psi[ps[4]]; g=&gg[4]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2942 "dwf.nw" k=5; f=&psi[ps[5]]; g=&gg[5]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2943 "dwf.nw" k=6; f=&psi[ps[6]]; g=&gg[6]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2944 "dwf.nw" k=7; f=&psi[ps[7]]; g=&gg[7]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2945 "dwf.nw" } #line 2921 "dwf.nw" #line 2992 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 2997 "dwf.nw" } #line 2922 "dwf.nw" #line 2978 "dwf.nw" rs = &rx5[s]; for (c = 0; c < 3; c++) { k = 6; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 2981 "dwf.nw" k = 7; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 2982 "dwf.nw" k = 2; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 2983 "dwf.nw" k = 3; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 2984 "dwf.nw" k = 0; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 2985 "dwf.nw" k = 1; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 2986 "dwf.nw" k = 4; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 2987 "dwf.nw" k = 5; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 2988 "dwf.nw" } #line 2923 "dwf.nw" } #line 2903 "dwf.nw" } #line 2655 "dwf.nw" #line 3417 "dwf.nw" QMP_wait(nb->qmp_cr); #line 2656 "dwf.nw" #line 2907 "dwf.nw" for (i = 0; i < nb->boundary_size; i++) { int m = nb->boundary[i].mask; xyzt = nb->boundary[i].index; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 2913 "dwf.nw" #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 2914 "dwf.nw" #line 2927 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 2950 "dwf.nw" for (c = 0; c < 3; c++) { if ((m & 0x01) == 0) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2953 "dwf.nw" } if ((m & 0x02) == 0) { k=1; f=&psi[ps[1]]; g=&gg[1]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2956 "dwf.nw" } if ((m & 0x04) == 0) { k=2; f=&psi[ps[2]]; g=&gg[2]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2959 "dwf.nw" } if ((m & 0x08) == 0) { k=3; f=&psi[ps[3]]; g=&gg[3]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2962 "dwf.nw" } if ((m & 0x10) == 0) { k=4; f=&psi[ps[4]]; g=&gg[4]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2965 "dwf.nw" } if ((m & 0x20) == 0) { k=5; f=&psi[ps[5]]; g=&gg[5]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2968 "dwf.nw" } if ((m & 0x40) == 0) { k=6; f=&psi[ps[6]]; g=&gg[6]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2971 "dwf.nw" } if ((m & 0x80) == 0) { k=7; f=&psi[ps[7]]; g=&gg[7]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2974 "dwf.nw" } } #line 2929 "dwf.nw" #line 3001 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = (m & (1 << d))? &nb->rcv_buf[d][ps[d]]: &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 3006 "dwf.nw" } #line 2930 "dwf.nw" #line 2978 "dwf.nw" rs = &rx5[s]; for (c = 0; c < 3; c++) { k = 6; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 2981 "dwf.nw" k = 7; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 2982 "dwf.nw" k = 2; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 2983 "dwf.nw" k = 3; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 2984 "dwf.nw" k = 0; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 2985 "dwf.nw" k = 1; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 2986 "dwf.nw" k = 4; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 2987 "dwf.nw" k = 5; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 2988 "dwf.nw" } #line 2931 "dwf.nw" } #line 2915 "dwf.nw" } #line 2657 "dwf.nw" #line 2771 "dwf.nw" #undef qs #undef qx5 #line 2658 "dwf.nw" } #line 2666 "dwf.nw" static void compute_1Sxy(vFermion *chi, const vFermion *eta, const vFermion *psi, struct neighbor *nb) { #line 3332 "dwf.nw" int i, xyzt5, s, c; vFermion * __restrict__ rx5, * __restrict__ rs; #line 2673 "dwf.nw" #line 3337 "dwf.nw" int xyzt, k, d; const vFermion *f; vHalfFermion *g; vHalfFermion gg[8], hh[8]; vSU3 V[8]; int ps[8], p5[8]; #line 3345 "dwf.nw" const SU3 *Uup, *Udown; int c1, c2; #line 2675 "dwf.nw" #line 2767 "dwf.nw" #define qx5 rx5 #define qs rs #line 2676 "dwf.nw" #line 3413 "dwf.nw" QMP_start(nb->qmp_cr); #line 2677 "dwf.nw" #line 3435 "dwf.nw" if (sending) { int i; /* This is QMP_wait_vector(nb->qmp_sv, nb->Ns); */ for (i = sending->Ns; i--;) QMP_wait(sending->qmp_sv[i]); sending = 0; } #line 2678 "dwf.nw" #line 2799 "dwf.nw" { int k, i, s, c, *src; const vFermion *f; vHalfFermion *g; k = 0; #line 2825 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2829 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2805 "dwf.nw" k = 1; #line 2815 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2819 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2806 "dwf.nw" k = 2; #line 2845 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2849 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2807 "dwf.nw" k = 3; #line 2835 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2839 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2808 "dwf.nw" k = 4; #line 2865 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2869 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2809 "dwf.nw" k = 5; #line 2855 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2859 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2810 "dwf.nw" k = 6; #line 2885 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2889 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2811 "dwf.nw" k = 7; #line 2875 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2879 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2812 "dwf.nw" } #line 2679 "dwf.nw" #line 3027 "dwf.nw" for (i = 0; i < nb->inside_size; i++) { const vFermion *ex5, *es; xyzt = nb->inside[i]; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 3033 "dwf.nw" ex5 = &eta[xyzt5]; #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 3035 "dwf.nw" #line 3054 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 3071 "dwf.nw" for (c = 0; c < 3; c++) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 3073 "dwf.nw" k=1; f=&psi[ps[1]]; g=&gg[1]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 3074 "dwf.nw" k=2; f=&psi[ps[2]]; g=&gg[2]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 3075 "dwf.nw" k=3; f=&psi[ps[3]]; g=&gg[3]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 3076 "dwf.nw" k=4; f=&psi[ps[4]]; g=&gg[4]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 3077 "dwf.nw" k=5; f=&psi[ps[5]]; g=&gg[5]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 3078 "dwf.nw" k=6; f=&psi[ps[6]]; g=&gg[6]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 3079 "dwf.nw" k=7; f=&psi[ps[7]]; g=&gg[7]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 3080 "dwf.nw" } #line 3056 "dwf.nw" #line 2992 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 2997 "dwf.nw" } #line 3057 "dwf.nw" #line 3114 "dwf.nw" rs = &rx5[s]; es = &ex5[s]; for (c = 0; c < 3; c++) { k = 7; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 3118 "dwf.nw" k = 6; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 3119 "dwf.nw" k = 3; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 3120 "dwf.nw" k = 2; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 3121 "dwf.nw" k = 0; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 3122 "dwf.nw" k = 1; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 3123 "dwf.nw" k = 4; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 3124 "dwf.nw" k = 5; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 3125 "dwf.nw" #line 3130 "dwf.nw" rs->f[0][c].re = es->f[0][c].re - rs->f[0][c].re; rs->f[0][c].im = es->f[0][c].im - rs->f[0][c].im; rs->f[1][c].re = es->f[1][c].re - rs->f[1][c].re; rs->f[1][c].im = es->f[1][c].im - rs->f[1][c].im; rs->f[2][c].re = es->f[2][c].re - rs->f[2][c].re; rs->f[2][c].im = es->f[2][c].im - rs->f[2][c].im; rs->f[3][c].re = es->f[3][c].re - rs->f[3][c].re; rs->f[3][c].im = es->f[3][c].im - rs->f[3][c].im; #line 3126 "dwf.nw" } #line 3058 "dwf.nw" } #line 3036 "dwf.nw" } #line 2680 "dwf.nw" #line 3417 "dwf.nw" QMP_wait(nb->qmp_cr); #line 2681 "dwf.nw" #line 3040 "dwf.nw" for (i = 0; i < nb->boundary_size; i++) { const vFermion *ex5, *es; int m = nb->boundary[i].mask; xyzt = nb->boundary[i].index; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 3047 "dwf.nw" ex5 = &eta[xyzt5]; #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 3049 "dwf.nw" #line 3062 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 3085 "dwf.nw" for (c = 0; c < 3; c++) { if ((m & 0x01) == 0) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 3088 "dwf.nw" } if ((m & 0x02) == 0) { k=1; f=&psi[ps[1]]; g=&gg[1]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 3091 "dwf.nw" } if ((m & 0x04) == 0) { k=2; f=&psi[ps[2]]; g=&gg[2]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 3094 "dwf.nw" } if ((m & 0x08) == 0) { k=3; f=&psi[ps[3]]; g=&gg[3]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 3097 "dwf.nw" } if ((m & 0x10) == 0) { k=4; f=&psi[ps[4]]; g=&gg[4]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 3100 "dwf.nw" } if ((m & 0x20) == 0) { k=5; f=&psi[ps[5]]; g=&gg[5]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 3103 "dwf.nw" } if ((m & 0x40) == 0) { k=6; f=&psi[ps[6]]; g=&gg[6]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 3106 "dwf.nw" } if ((m & 0x80) == 0) { k=7; f=&psi[ps[7]]; g=&gg[7]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 3109 "dwf.nw" } } #line 3064 "dwf.nw" #line 3001 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = (m & (1 << d))? &nb->rcv_buf[d][ps[d]]: &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 3006 "dwf.nw" } #line 3065 "dwf.nw" #line 3114 "dwf.nw" rs = &rx5[s]; es = &ex5[s]; for (c = 0; c < 3; c++) { k = 7; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 3118 "dwf.nw" k = 6; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 3119 "dwf.nw" k = 3; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 3120 "dwf.nw" k = 2; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 3121 "dwf.nw" k = 0; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 3122 "dwf.nw" k = 1; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 3123 "dwf.nw" k = 4; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 3124 "dwf.nw" k = 5; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 3125 "dwf.nw" #line 3130 "dwf.nw" rs->f[0][c].re = es->f[0][c].re - rs->f[0][c].re; rs->f[0][c].im = es->f[0][c].im - rs->f[0][c].im; rs->f[1][c].re = es->f[1][c].re - rs->f[1][c].re; rs->f[1][c].im = es->f[1][c].im - rs->f[1][c].im; rs->f[2][c].re = es->f[2][c].re - rs->f[2][c].re; rs->f[2][c].im = es->f[2][c].im - rs->f[2][c].im; rs->f[3][c].re = es->f[3][c].re - rs->f[3][c].re; rs->f[3][c].im = es->f[3][c].im - rs->f[3][c].im; #line 3126 "dwf.nw" } #line 3066 "dwf.nw" } #line 3050 "dwf.nw" } #line 2682 "dwf.nw" #line 2771 "dwf.nw" #undef qs #undef qx5 #line 2683 "dwf.nw" } #line 2690 "dwf.nw" static void compute_Qxx1Qxy(vFermion *chi, const vFermion *psi, struct neighbor *nb) { #line 3332 "dwf.nw" int i, xyzt5, s, c; vFermion * __restrict__ rx5, * __restrict__ rs; #line 2696 "dwf.nw" #line 3337 "dwf.nw" int xyzt, k, d; const vFermion *f; vHalfFermion *g; vHalfFermion gg[8], hh[8]; vSU3 V[8]; int ps[8], p5[8]; #line 3345 "dwf.nw" const SU3 *Uup, *Udown; int c1, c2; #line 2697 "dwf.nw" #line 3359 "dwf.nw" vReal fx; vHalfFermion zV; vcomplex zn, z1, z2, z3; complex zX[2][3]; vHalfFermion xOut; vHalfFermion yOut; #line 2699 "dwf.nw" #line 2767 "dwf.nw" #define qx5 rx5 #define qs rs #line 2700 "dwf.nw" #line 3413 "dwf.nw" QMP_start(nb->qmp_cr); #line 2701 "dwf.nw" #line 3435 "dwf.nw" if (sending) { int i; /* This is QMP_wait_vector(nb->qmp_sv, nb->Ns); */ for (i = sending->Ns; i--;) QMP_wait(sending->qmp_sv[i]); sending = 0; } #line 2702 "dwf.nw" #line 2782 "dwf.nw" { int k, i, s, c, *src; const vFermion *f; vHalfFermion *g; k = 0; #line 2815 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2819 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2788 "dwf.nw" k = 1; #line 2825 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2829 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2789 "dwf.nw" k = 2; #line 2835 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2839 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2790 "dwf.nw" k = 3; #line 2845 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2849 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2791 "dwf.nw" k = 4; #line 2855 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2859 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2792 "dwf.nw" k = 5; #line 2865 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2869 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2793 "dwf.nw" k = 6; #line 2875 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2879 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2794 "dwf.nw" k = 7; #line 2885 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2889 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2795 "dwf.nw" } #line 2703 "dwf.nw" #line 3142 "dwf.nw" for (i = 0; i < nb->inside_size; i++) { xyzt = nb->inside[i]; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 3146 "dwf.nw" #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 3147 "dwf.nw" #line 2919 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 2936 "dwf.nw" for (c = 0; c < 3; c++) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2938 "dwf.nw" k=1; f=&psi[ps[1]]; g=&gg[1]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2939 "dwf.nw" k=2; f=&psi[ps[2]]; g=&gg[2]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2940 "dwf.nw" k=3; f=&psi[ps[3]]; g=&gg[3]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2941 "dwf.nw" k=4; f=&psi[ps[4]]; g=&gg[4]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2942 "dwf.nw" k=5; f=&psi[ps[5]]; g=&gg[5]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2943 "dwf.nw" k=6; f=&psi[ps[6]]; g=&gg[6]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2944 "dwf.nw" k=7; f=&psi[ps[7]]; g=&gg[7]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2945 "dwf.nw" } #line 2921 "dwf.nw" #line 2992 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 2997 "dwf.nw" } #line 2922 "dwf.nw" #line 2978 "dwf.nw" rs = &rx5[s]; for (c = 0; c < 3; c++) { k = 6; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 2981 "dwf.nw" k = 7; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 2982 "dwf.nw" k = 2; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 2983 "dwf.nw" k = 3; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 2984 "dwf.nw" k = 0; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 2985 "dwf.nw" k = 1; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 2986 "dwf.nw" k = 4; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 2987 "dwf.nw" k = 5; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 2988 "dwf.nw" } #line 2923 "dwf.nw" } #line 3148 "dwf.nw" #line 2269 "dwf.nw" vhfzero(&zV); fx = ab_LA; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2272 "dwf.nw" for (s = 0; s < S_4_1; s++, fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2276 "dwf.nw" } rs = &rx5[S_4_1]; QSETUP(S_4_1) vput_3(&fx, c0); #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2281 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2284 "dwf.nw" zn.re = qs->f[0][c].re; zn.im = qs->f[0][c].im; vput_3(&zn.re, zX[0][c].re); vput_3(&zn.im, zX[0][c].im); rs->f[0][c].re = zn.re; rs->f[0][c].im = zn.im; zn.re = qs->f[1][c].re; zn.im = qs->f[1][c].im; vput_3(&zn.re, zX[1][c].re); vput_3(&zn.im, zX[1][c].im); rs->f[1][c].re = zn.re; rs->f[1][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2417 "dwf.nw" for (s = S_4; s--;) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2463 "dwf.nw" zn.re = rs->f[0][c].re; zn.im = rs->f[0][c].im; z1.re = shift_down1(zn.re, xOut.f[0][c].re); z1.im = shift_down1(zn.im, xOut.f[0][c].im); z2.re = shift_down2(zn.re, xOut.f[0][c].re); z2.im = shift_down2(zn.im, xOut.f[0][c].im); z3.re = shift_down3(zn.re, xOut.f[0][c].re); z3.im = shift_down3(zn.im, xOut.f[0][c].im); rs->f[0][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[0][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[0][c].re; yOut.f[0][c].im = rs->f[0][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2421 "dwf.nw" #line 2480 "dwf.nw" zn.re = rs->f[1][c].re; zn.im = rs->f[1][c].im; z1.re = shift_down1(zn.re, xOut.f[1][c].re); z1.im = shift_down1(zn.im, xOut.f[1][c].im); z2.re = shift_down2(zn.re, xOut.f[1][c].re); z2.im = shift_down2(zn.im, xOut.f[1][c].im); z3.re = shift_down3(zn.re, xOut.f[1][c].re); z3.im = shift_down3(zn.im, xOut.f[1][c].im); rs->f[1][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[1][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[1][c].re; yOut.f[1][c].im = rs->f[1][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2422 "dwf.nw" } } #line 2377 "dwf.nw" vhfzero(&zV); fx = ab_LB; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2380 "dwf.nw" for (s = S_4; --s; fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2384 "dwf.nw" } rs = &rx5[0]; QSETUP(0) vput_0(&fx, c0); #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2389 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2391 "dwf.nw" zn.re = qs->f[2][c].re; zn.im = qs->f[2][c].im; vput_0(&zn.re, zX[0][c].re); vput_0(&zn.im, zX[0][c].im); rs->f[2][c].re = zn.re; rs->f[2][c].im = zn.im; zn.re = qs->f[3][c].re; zn.im = qs->f[3][c].im; vput_0(&zn.re, zX[1][c].re); vput_0(&zn.im, zX[1][c].im); rs->f[3][c].re = zn.re; rs->f[3][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2447 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2565 "dwf.nw" zn.re = rs->f[2][c].re; zn.im = rs->f[2][c].im; z1.re = shift_up1(xOut.f[0][c].re, zn.re); z1.im = shift_up1(xOut.f[0][c].im, zn.im); z2.re = shift_up2(xOut.f[0][c].re, zn.re); z2.im = shift_up2(xOut.f[0][c].im, zn.im); z3.re = shift_up3(xOut.f[0][c].re, zn.re); z3.im = shift_up3(xOut.f[0][c].im, zn.im); rs->f[2][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[2][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[2][c].re; yOut.f[0][c].im = rs->f[2][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2451 "dwf.nw" #line 2582 "dwf.nw" zn.re = rs->f[3][c].re; zn.im = rs->f[3][c].im; z1.re = shift_up1(xOut.f[1][c].re, zn.re); z1.im = shift_up1(xOut.f[1][c].im, zn.im); z2.re = shift_up2(xOut.f[1][c].re, zn.re); z2.im = shift_up2(xOut.f[1][c].im, zn.im); z3.re = shift_up3(xOut.f[1][c].re, zn.re); z3.im = shift_up3(xOut.f[1][c].im, zn.im); rs->f[3][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[3][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[3][c].re; yOut.f[1][c].im = rs->f[3][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2452 "dwf.nw" } } #line 3149 "dwf.nw" } #line 2704 "dwf.nw" #line 3417 "dwf.nw" QMP_wait(nb->qmp_cr); #line 2705 "dwf.nw" #line 3153 "dwf.nw" for (i = 0; i < nb->boundary_size; i++) { int m = nb->boundary[i].mask; xyzt = nb->boundary[i].index; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 3159 "dwf.nw" #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 3160 "dwf.nw" #line 2927 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 2950 "dwf.nw" for (c = 0; c < 3; c++) { if ((m & 0x01) == 0) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2953 "dwf.nw" } if ((m & 0x02) == 0) { k=1; f=&psi[ps[1]]; g=&gg[1]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2956 "dwf.nw" } if ((m & 0x04) == 0) { k=2; f=&psi[ps[2]]; g=&gg[2]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2959 "dwf.nw" } if ((m & 0x08) == 0) { k=3; f=&psi[ps[3]]; g=&gg[3]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2962 "dwf.nw" } if ((m & 0x10) == 0) { k=4; f=&psi[ps[4]]; g=&gg[4]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2965 "dwf.nw" } if ((m & 0x20) == 0) { k=5; f=&psi[ps[5]]; g=&gg[5]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2968 "dwf.nw" } if ((m & 0x40) == 0) { k=6; f=&psi[ps[6]]; g=&gg[6]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2971 "dwf.nw" } if ((m & 0x80) == 0) { k=7; f=&psi[ps[7]]; g=&gg[7]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2974 "dwf.nw" } } #line 2929 "dwf.nw" #line 3001 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = (m & (1 << d))? &nb->rcv_buf[d][ps[d]]: &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 3006 "dwf.nw" } #line 2930 "dwf.nw" #line 2978 "dwf.nw" rs = &rx5[s]; for (c = 0; c < 3; c++) { k = 6; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 2981 "dwf.nw" k = 7; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 2982 "dwf.nw" k = 2; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 2983 "dwf.nw" k = 3; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 2984 "dwf.nw" k = 0; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 2985 "dwf.nw" k = 1; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 2986 "dwf.nw" k = 4; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 2987 "dwf.nw" k = 5; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 2988 "dwf.nw" } #line 2931 "dwf.nw" } #line 3161 "dwf.nw" #line 2269 "dwf.nw" vhfzero(&zV); fx = ab_LA; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2272 "dwf.nw" for (s = 0; s < S_4_1; s++, fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2276 "dwf.nw" } rs = &rx5[S_4_1]; QSETUP(S_4_1) vput_3(&fx, c0); #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2281 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2284 "dwf.nw" zn.re = qs->f[0][c].re; zn.im = qs->f[0][c].im; vput_3(&zn.re, zX[0][c].re); vput_3(&zn.im, zX[0][c].im); rs->f[0][c].re = zn.re; rs->f[0][c].im = zn.im; zn.re = qs->f[1][c].re; zn.im = qs->f[1][c].im; vput_3(&zn.re, zX[1][c].re); vput_3(&zn.im, zX[1][c].im); rs->f[1][c].re = zn.re; rs->f[1][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2417 "dwf.nw" for (s = S_4; s--;) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2463 "dwf.nw" zn.re = rs->f[0][c].re; zn.im = rs->f[0][c].im; z1.re = shift_down1(zn.re, xOut.f[0][c].re); z1.im = shift_down1(zn.im, xOut.f[0][c].im); z2.re = shift_down2(zn.re, xOut.f[0][c].re); z2.im = shift_down2(zn.im, xOut.f[0][c].im); z3.re = shift_down3(zn.re, xOut.f[0][c].re); z3.im = shift_down3(zn.im, xOut.f[0][c].im); rs->f[0][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[0][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[0][c].re; yOut.f[0][c].im = rs->f[0][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2421 "dwf.nw" #line 2480 "dwf.nw" zn.re = rs->f[1][c].re; zn.im = rs->f[1][c].im; z1.re = shift_down1(zn.re, xOut.f[1][c].re); z1.im = shift_down1(zn.im, xOut.f[1][c].im); z2.re = shift_down2(zn.re, xOut.f[1][c].re); z2.im = shift_down2(zn.im, xOut.f[1][c].im); z3.re = shift_down3(zn.re, xOut.f[1][c].re); z3.im = shift_down3(zn.im, xOut.f[1][c].im); rs->f[1][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[1][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[1][c].re; yOut.f[1][c].im = rs->f[1][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2422 "dwf.nw" } } #line 2377 "dwf.nw" vhfzero(&zV); fx = ab_LB; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2380 "dwf.nw" for (s = S_4; --s; fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2384 "dwf.nw" } rs = &rx5[0]; QSETUP(0) vput_0(&fx, c0); #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2389 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2391 "dwf.nw" zn.re = qs->f[2][c].re; zn.im = qs->f[2][c].im; vput_0(&zn.re, zX[0][c].re); vput_0(&zn.im, zX[0][c].im); rs->f[2][c].re = zn.re; rs->f[2][c].im = zn.im; zn.re = qs->f[3][c].re; zn.im = qs->f[3][c].im; vput_0(&zn.re, zX[1][c].re); vput_0(&zn.im, zX[1][c].im); rs->f[3][c].re = zn.re; rs->f[3][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2447 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2565 "dwf.nw" zn.re = rs->f[2][c].re; zn.im = rs->f[2][c].im; z1.re = shift_up1(xOut.f[0][c].re, zn.re); z1.im = shift_up1(xOut.f[0][c].im, zn.im); z2.re = shift_up2(xOut.f[0][c].re, zn.re); z2.im = shift_up2(xOut.f[0][c].im, zn.im); z3.re = shift_up3(xOut.f[0][c].re, zn.re); z3.im = shift_up3(xOut.f[0][c].im, zn.im); rs->f[2][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[2][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[2][c].re; yOut.f[0][c].im = rs->f[2][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2451 "dwf.nw" #line 2582 "dwf.nw" zn.re = rs->f[3][c].re; zn.im = rs->f[3][c].im; z1.re = shift_up1(xOut.f[1][c].re, zn.re); z1.im = shift_up1(xOut.f[1][c].im, zn.im); z2.re = shift_up2(xOut.f[1][c].re, zn.re); z2.im = shift_up2(xOut.f[1][c].im, zn.im); z3.re = shift_up3(xOut.f[1][c].re, zn.re); z3.im = shift_up3(xOut.f[1][c].im, zn.im); rs->f[3][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[3][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[3][c].re; yOut.f[1][c].im = rs->f[3][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2452 "dwf.nw" } } #line 3162 "dwf.nw" } #line 2706 "dwf.nw" #line 2771 "dwf.nw" #undef qs #undef qx5 #line 2707 "dwf.nw" } #line 2714 "dwf.nw" static void compute_Sxx1Sxy(vFermion *chi, const vFermion *psi, struct neighbor *nb) { #line 3332 "dwf.nw" int i, xyzt5, s, c; vFermion * __restrict__ rx5, * __restrict__ rs; #line 2720 "dwf.nw" #line 3337 "dwf.nw" int xyzt, k, d; const vFermion *f; vHalfFermion *g; vHalfFermion gg[8], hh[8]; vSU3 V[8]; int ps[8], p5[8]; #line 3345 "dwf.nw" const SU3 *Uup, *Udown; int c1, c2; #line 2721 "dwf.nw" #line 3359 "dwf.nw" vReal fx; vHalfFermion zV; vcomplex zn, z1, z2, z3; complex zX[2][3]; vHalfFermion xOut; vHalfFermion yOut; #line 2723 "dwf.nw" #line 2767 "dwf.nw" #define qx5 rx5 #define qs rs #line 2724 "dwf.nw" #line 3413 "dwf.nw" QMP_start(nb->qmp_cr); #line 2725 "dwf.nw" #line 3435 "dwf.nw" if (sending) { int i; /* This is QMP_wait_vector(nb->qmp_sv, nb->Ns); */ for (i = sending->Ns; i--;) QMP_wait(sending->qmp_sv[i]); sending = 0; } #line 2726 "dwf.nw" #line 2799 "dwf.nw" { int k, i, s, c, *src; const vFermion *f; vHalfFermion *g; k = 0; #line 2825 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2829 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2805 "dwf.nw" k = 1; #line 2815 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2819 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2806 "dwf.nw" k = 2; #line 2845 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2849 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2807 "dwf.nw" k = 3; #line 2835 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2839 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2808 "dwf.nw" k = 4; #line 2865 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2869 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2809 "dwf.nw" k = 5; #line 2855 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2859 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2810 "dwf.nw" k = 6; #line 2885 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2889 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2811 "dwf.nw" k = 7; #line 2875 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2879 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2812 "dwf.nw" } #line 2727 "dwf.nw" #line 3167 "dwf.nw" for (i = 0; i < nb->inside_size; i++) { xyzt = nb->inside[i]; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 3171 "dwf.nw" #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 3172 "dwf.nw" #line 3191 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 3071 "dwf.nw" for (c = 0; c < 3; c++) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 3073 "dwf.nw" k=1; f=&psi[ps[1]]; g=&gg[1]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 3074 "dwf.nw" k=2; f=&psi[ps[2]]; g=&gg[2]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 3075 "dwf.nw" k=3; f=&psi[ps[3]]; g=&gg[3]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 3076 "dwf.nw" k=4; f=&psi[ps[4]]; g=&gg[4]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 3077 "dwf.nw" k=5; f=&psi[ps[5]]; g=&gg[5]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 3078 "dwf.nw" k=6; f=&psi[ps[6]]; g=&gg[6]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 3079 "dwf.nw" k=7; f=&psi[ps[7]]; g=&gg[7]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 3080 "dwf.nw" } #line 3193 "dwf.nw" #line 2992 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 2997 "dwf.nw" } #line 3194 "dwf.nw" #line 3207 "dwf.nw" rs = &rx5[s]; for (c = 0; c < 3; c++) { k = 7; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 3210 "dwf.nw" k = 6; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 3211 "dwf.nw" k = 3; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 3212 "dwf.nw" k = 2; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 3213 "dwf.nw" k = 0; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 3214 "dwf.nw" k = 1; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 3215 "dwf.nw" k = 4; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 3216 "dwf.nw" k = 5; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 3217 "dwf.nw" } #line 3195 "dwf.nw" } #line 3173 "dwf.nw" #line 2311 "dwf.nw" vhfzero(&zV); fx = ab_LA; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2314 "dwf.nw" for (s = 0; s < S_4_1; s++, fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2318 "dwf.nw" } rs = &rx5[S_4_1]; QSETUP(S_4_1) vput_3(&fx, c0); #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2323 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2325 "dwf.nw" zn.re = qs->f[2][c].re; zn.im = qs->f[2][c].im; vput_3(&zn.re, zX[0][c].re); vput_3(&zn.im, zX[0][c].im); rs->f[2][c].re = zn.re; rs->f[2][c].im = zn.im; zn.re = qs->f[3][c].re; zn.im = qs->f[3][c].im; vput_3(&zn.re, zX[1][c].re); vput_3(&zn.im, zX[1][c].im); rs->f[3][c].re = zn.re; rs->f[3][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2427 "dwf.nw" for (s = S_4; s--;) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2497 "dwf.nw" zn.re = rs->f[2][c].re; zn.im = rs->f[2][c].im; z1.re = shift_down1(zn.re, xOut.f[0][c].re); z1.im = shift_down1(zn.im, xOut.f[0][c].im); z2.re = shift_down2(zn.re, xOut.f[0][c].re); z2.im = shift_down2(zn.im, xOut.f[0][c].im); z3.re = shift_down3(zn.re, xOut.f[0][c].re); z3.im = shift_down3(zn.im, xOut.f[0][c].im); rs->f[2][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[2][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[2][c].re; yOut.f[0][c].im = rs->f[2][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2431 "dwf.nw" #line 2514 "dwf.nw" zn.re = rs->f[3][c].re; zn.im = rs->f[3][c].im; z1.re = shift_down1(zn.re, xOut.f[1][c].re); z1.im = shift_down1(zn.im, xOut.f[1][c].im); z2.re = shift_down2(zn.re, xOut.f[1][c].re); z2.im = shift_down2(zn.im, xOut.f[1][c].im); z3.re = shift_down3(zn.re, xOut.f[1][c].re); z3.im = shift_down3(zn.im, xOut.f[1][c].im); rs->f[3][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[3][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[3][c].re; yOut.f[1][c].im = rs->f[3][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2432 "dwf.nw" } } #line 2349 "dwf.nw" vhfzero(&zV); fx = ab_LB; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2352 "dwf.nw" for (s = S_4; --s; fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2356 "dwf.nw" } rs = &rx5[0]; QSETUP(0) vput_0(&fx, c0); #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2361 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2364 "dwf.nw" zn.re = qs->f[0][c].re; zn.im = qs->f[0][c].im; vput_0(&zn.re, zX[0][c].re); vput_0(&zn.im, zX[0][c].im); rs->f[0][c].re = zn.re; rs->f[0][c].im = zn.im; zn.re = qs->f[1][c].re; zn.im = qs->f[1][c].im; vput_0(&zn.re, zX[1][c].re); vput_0(&zn.im, zX[1][c].im); rs->f[1][c].re = zn.re; rs->f[1][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2437 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2531 "dwf.nw" zn.re = rs->f[0][c].re; zn.im = rs->f[0][c].im; z1.re = shift_up1(xOut.f[0][c].re, zn.re); z1.im = shift_up1(xOut.f[0][c].im, zn.im); z2.re = shift_up2(xOut.f[0][c].re, zn.re); z2.im = shift_up2(xOut.f[0][c].im, zn.im); z3.re = shift_up3(xOut.f[0][c].re, zn.re); z3.im = shift_up3(xOut.f[0][c].im, zn.im); rs->f[0][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[0][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[0][c].re; yOut.f[0][c].im = rs->f[0][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2441 "dwf.nw" #line 2548 "dwf.nw" zn.re = rs->f[1][c].re; zn.im = rs->f[1][c].im; z1.re = shift_up1(xOut.f[1][c].re, zn.re); z1.im = shift_up1(xOut.f[1][c].im, zn.im); z2.re = shift_up2(xOut.f[1][c].re, zn.re); z2.im = shift_up2(xOut.f[1][c].im, zn.im); z3.re = shift_up3(xOut.f[1][c].re, zn.re); z3.im = shift_up3(xOut.f[1][c].im, zn.im); rs->f[1][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[1][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[1][c].re; yOut.f[1][c].im = rs->f[1][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2442 "dwf.nw" } } #line 3174 "dwf.nw" } #line 2728 "dwf.nw" #line 3417 "dwf.nw" QMP_wait(nb->qmp_cr); #line 2729 "dwf.nw" #line 3178 "dwf.nw" for (i = 0; i < nb->boundary_size; i++) { int m = nb->boundary[i].mask; xyzt = nb->boundary[i].index; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 3184 "dwf.nw" #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 3185 "dwf.nw" #line 3199 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 3085 "dwf.nw" for (c = 0; c < 3; c++) { if ((m & 0x01) == 0) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 3088 "dwf.nw" } if ((m & 0x02) == 0) { k=1; f=&psi[ps[1]]; g=&gg[1]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 3091 "dwf.nw" } if ((m & 0x04) == 0) { k=2; f=&psi[ps[2]]; g=&gg[2]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 3094 "dwf.nw" } if ((m & 0x08) == 0) { k=3; f=&psi[ps[3]]; g=&gg[3]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 3097 "dwf.nw" } if ((m & 0x10) == 0) { k=4; f=&psi[ps[4]]; g=&gg[4]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 3100 "dwf.nw" } if ((m & 0x20) == 0) { k=5; f=&psi[ps[5]]; g=&gg[5]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 3103 "dwf.nw" } if ((m & 0x40) == 0) { k=6; f=&psi[ps[6]]; g=&gg[6]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 3106 "dwf.nw" } if ((m & 0x80) == 0) { k=7; f=&psi[ps[7]]; g=&gg[7]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 3109 "dwf.nw" } } #line 3201 "dwf.nw" #line 3001 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = (m & (1 << d))? &nb->rcv_buf[d][ps[d]]: &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 3006 "dwf.nw" } #line 3202 "dwf.nw" #line 3207 "dwf.nw" rs = &rx5[s]; for (c = 0; c < 3; c++) { k = 7; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 3210 "dwf.nw" k = 6; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 3211 "dwf.nw" k = 3; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 3212 "dwf.nw" k = 2; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 3213 "dwf.nw" k = 0; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 3214 "dwf.nw" k = 1; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 3215 "dwf.nw" k = 4; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 3216 "dwf.nw" k = 5; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 3217 "dwf.nw" } #line 3203 "dwf.nw" } #line 3186 "dwf.nw" #line 2311 "dwf.nw" vhfzero(&zV); fx = ab_LA; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2314 "dwf.nw" for (s = 0; s < S_4_1; s++, fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2318 "dwf.nw" } rs = &rx5[S_4_1]; QSETUP(S_4_1) vput_3(&fx, c0); #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2323 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2325 "dwf.nw" zn.re = qs->f[2][c].re; zn.im = qs->f[2][c].im; vput_3(&zn.re, zX[0][c].re); vput_3(&zn.im, zX[0][c].im); rs->f[2][c].re = zn.re; rs->f[2][c].im = zn.im; zn.re = qs->f[3][c].re; zn.im = qs->f[3][c].im; vput_3(&zn.re, zX[1][c].re); vput_3(&zn.im, zX[1][c].im); rs->f[3][c].re = zn.re; rs->f[3][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2427 "dwf.nw" for (s = S_4; s--;) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2497 "dwf.nw" zn.re = rs->f[2][c].re; zn.im = rs->f[2][c].im; z1.re = shift_down1(zn.re, xOut.f[0][c].re); z1.im = shift_down1(zn.im, xOut.f[0][c].im); z2.re = shift_down2(zn.re, xOut.f[0][c].re); z2.im = shift_down2(zn.im, xOut.f[0][c].im); z3.re = shift_down3(zn.re, xOut.f[0][c].re); z3.im = shift_down3(zn.im, xOut.f[0][c].im); rs->f[2][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[2][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[2][c].re; yOut.f[0][c].im = rs->f[2][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2431 "dwf.nw" #line 2514 "dwf.nw" zn.re = rs->f[3][c].re; zn.im = rs->f[3][c].im; z1.re = shift_down1(zn.re, xOut.f[1][c].re); z1.im = shift_down1(zn.im, xOut.f[1][c].im); z2.re = shift_down2(zn.re, xOut.f[1][c].re); z2.im = shift_down2(zn.im, xOut.f[1][c].im); z3.re = shift_down3(zn.re, xOut.f[1][c].re); z3.im = shift_down3(zn.im, xOut.f[1][c].im); rs->f[3][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[3][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[3][c].re; yOut.f[1][c].im = rs->f[3][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2432 "dwf.nw" } } #line 2349 "dwf.nw" vhfzero(&zV); fx = ab_LB; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2352 "dwf.nw" for (s = S_4; --s; fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2356 "dwf.nw" } rs = &rx5[0]; QSETUP(0) vput_0(&fx, c0); #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2361 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2364 "dwf.nw" zn.re = qs->f[0][c].re; zn.im = qs->f[0][c].im; vput_0(&zn.re, zX[0][c].re); vput_0(&zn.im, zX[0][c].im); rs->f[0][c].re = zn.re; rs->f[0][c].im = zn.im; zn.re = qs->f[1][c].re; zn.im = qs->f[1][c].im; vput_0(&zn.re, zX[1][c].re); vput_0(&zn.im, zX[1][c].im); rs->f[1][c].re = zn.re; rs->f[1][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2437 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2531 "dwf.nw" zn.re = rs->f[0][c].re; zn.im = rs->f[0][c].im; z1.re = shift_up1(xOut.f[0][c].re, zn.re); z1.im = shift_up1(xOut.f[0][c].im, zn.im); z2.re = shift_up2(xOut.f[0][c].re, zn.re); z2.im = shift_up2(xOut.f[0][c].im, zn.im); z3.re = shift_up3(xOut.f[0][c].re, zn.re); z3.im = shift_up3(xOut.f[0][c].im, zn.im); rs->f[0][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[0][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[0][c].re; yOut.f[0][c].im = rs->f[0][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2441 "dwf.nw" #line 2548 "dwf.nw" zn.re = rs->f[1][c].re; zn.im = rs->f[1][c].im; z1.re = shift_up1(xOut.f[1][c].re, zn.re); z1.im = shift_up1(xOut.f[1][c].im, zn.im); z2.re = shift_up2(xOut.f[1][c].re, zn.re); z2.im = shift_up2(xOut.f[1][c].im, zn.im); z3.re = shift_up3(xOut.f[1][c].re, zn.re); z3.im = shift_up3(xOut.f[1][c].im, zn.im); rs->f[1][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[1][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[1][c].re; yOut.f[1][c].im = rs->f[1][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2442 "dwf.nw" } } #line 3187 "dwf.nw" } #line 2730 "dwf.nw" #line 2771 "dwf.nw" #undef qs #undef qx5 #line 2731 "dwf.nw" } #line 2738 "dwf.nw" static void compute_1Qxx1Qxy(vFermion *chi, double *norm, const vFermion *eta, const vFermion *psi, struct neighbor *nb) { #line 3332 "dwf.nw" int i, xyzt5, s, c; vFermion * __restrict__ rx5, * __restrict__ rs; #line 2746 "dwf.nw" #line 3337 "dwf.nw" int xyzt, k, d; const vFermion *f; vHalfFermion *g; vHalfFermion gg[8], hh[8]; vSU3 V[8]; int ps[8], p5[8]; #line 3345 "dwf.nw" const SU3 *Uup, *Udown; int c1, c2; #line 2747 "dwf.nw" #line 3359 "dwf.nw" vReal fx; vHalfFermion zV; vcomplex zn, z1, z2, z3; complex zX[2][3]; vHalfFermion xOut; vHalfFermion yOut; #line 2748 "dwf.nw" vReal vv; vReal nv = vmk1(0.0); #line 2767 "dwf.nw" #define qx5 rx5 #define qs rs #line 2752 "dwf.nw" #line 3413 "dwf.nw" QMP_start(nb->qmp_cr); #line 2753 "dwf.nw" #line 3435 "dwf.nw" if (sending) { int i; /* This is QMP_wait_vector(nb->qmp_sv, nb->Ns); */ for (i = sending->Ns; i--;) QMP_wait(sending->qmp_sv[i]); sending = 0; } #line 2754 "dwf.nw" #line 2782 "dwf.nw" { int k, i, s, c, *src; const vFermion *f; vHalfFermion *g; k = 0; #line 2815 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2819 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2788 "dwf.nw" k = 1; #line 2825 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2829 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2789 "dwf.nw" k = 2; #line 2835 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2839 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2790 "dwf.nw" k = 3; #line 2845 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2849 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2791 "dwf.nw" k = 4; #line 2855 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2859 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2792 "dwf.nw" k = 5; #line 2865 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2869 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2793 "dwf.nw" k = 6; #line 2875 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2879 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2794 "dwf.nw" k = 7; #line 2885 "dwf.nw" for (i = nb->snd_size[k], g = nb->snd_buf[k], src = nb->snd[k]; i--; src++) { for (s = S_4, f = &psi[*src]; s--; g++, f++) { for (c = 0; c < 3; c++) { #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2889 "dwf.nw" } } } #line 3425 "dwf.nw" if (nb->qmp_smask & (1 << k)) { QMP_start(nb->qmp_sh[k]); sending = nb; } #line 2795 "dwf.nw" } #line 2755 "dwf.nw" #line 3222 "dwf.nw" for (i = 0; i < nb->inside_size; i++) { const vFermion *ex5, *es; xyzt = nb->inside[i]; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 3228 "dwf.nw" ex5 = &eta[xyzt5]; #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 3230 "dwf.nw" #line 2919 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 2936 "dwf.nw" for (c = 0; c < 3; c++) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2938 "dwf.nw" k=1; f=&psi[ps[1]]; g=&gg[1]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2939 "dwf.nw" k=2; f=&psi[ps[2]]; g=&gg[2]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2940 "dwf.nw" k=3; f=&psi[ps[3]]; g=&gg[3]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2941 "dwf.nw" k=4; f=&psi[ps[4]]; g=&gg[4]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2942 "dwf.nw" k=5; f=&psi[ps[5]]; g=&gg[5]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2943 "dwf.nw" k=6; f=&psi[ps[6]]; g=&gg[6]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2944 "dwf.nw" k=7; f=&psi[ps[7]]; g=&gg[7]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2945 "dwf.nw" } #line 2921 "dwf.nw" #line 2992 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 2997 "dwf.nw" } #line 2922 "dwf.nw" #line 2978 "dwf.nw" rs = &rx5[s]; for (c = 0; c < 3; c++) { k = 6; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 2981 "dwf.nw" k = 7; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 2982 "dwf.nw" k = 2; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 2983 "dwf.nw" k = 3; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 2984 "dwf.nw" k = 0; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 2985 "dwf.nw" k = 1; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 2986 "dwf.nw" k = 4; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 2987 "dwf.nw" k = 5; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 2988 "dwf.nw" } #line 2923 "dwf.nw" } #line 3231 "dwf.nw" #line 2269 "dwf.nw" vhfzero(&zV); fx = ab_LA; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2272 "dwf.nw" for (s = 0; s < S_4_1; s++, fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2276 "dwf.nw" } rs = &rx5[S_4_1]; QSETUP(S_4_1) vput_3(&fx, c0); #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2281 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2284 "dwf.nw" zn.re = qs->f[0][c].re; zn.im = qs->f[0][c].im; vput_3(&zn.re, zX[0][c].re); vput_3(&zn.im, zX[0][c].im); rs->f[0][c].re = zn.re; rs->f[0][c].im = zn.im; zn.re = qs->f[1][c].re; zn.im = qs->f[1][c].im; vput_3(&zn.re, zX[1][c].re); vput_3(&zn.im, zX[1][c].im); rs->f[1][c].re = zn.re; rs->f[1][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2417 "dwf.nw" for (s = S_4; s--;) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2463 "dwf.nw" zn.re = rs->f[0][c].re; zn.im = rs->f[0][c].im; z1.re = shift_down1(zn.re, xOut.f[0][c].re); z1.im = shift_down1(zn.im, xOut.f[0][c].im); z2.re = shift_down2(zn.re, xOut.f[0][c].re); z2.im = shift_down2(zn.im, xOut.f[0][c].im); z3.re = shift_down3(zn.re, xOut.f[0][c].re); z3.im = shift_down3(zn.im, xOut.f[0][c].im); rs->f[0][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[0][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[0][c].re; yOut.f[0][c].im = rs->f[0][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2421 "dwf.nw" #line 2480 "dwf.nw" zn.re = rs->f[1][c].re; zn.im = rs->f[1][c].im; z1.re = shift_down1(zn.re, xOut.f[1][c].re); z1.im = shift_down1(zn.im, xOut.f[1][c].im); z2.re = shift_down2(zn.re, xOut.f[1][c].re); z2.im = shift_down2(zn.im, xOut.f[1][c].im); z3.re = shift_down3(zn.re, xOut.f[1][c].re); z3.im = shift_down3(zn.im, xOut.f[1][c].im); rs->f[1][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[1][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[1][c].re; yOut.f[1][c].im = rs->f[1][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2422 "dwf.nw" } } #line 2377 "dwf.nw" vhfzero(&zV); fx = ab_LB; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2380 "dwf.nw" for (s = S_4; --s; fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2384 "dwf.nw" } rs = &rx5[0]; QSETUP(0) vput_0(&fx, c0); #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2389 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2391 "dwf.nw" zn.re = qs->f[2][c].re; zn.im = qs->f[2][c].im; vput_0(&zn.re, zX[0][c].re); vput_0(&zn.im, zX[0][c].im); rs->f[2][c].re = zn.re; rs->f[2][c].im = zn.im; zn.re = qs->f[3][c].re; zn.im = qs->f[3][c].im; vput_0(&zn.re, zX[1][c].re); vput_0(&zn.im, zX[1][c].im); rs->f[3][c].re = zn.re; rs->f[3][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2447 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2565 "dwf.nw" zn.re = rs->f[2][c].re; zn.im = rs->f[2][c].im; z1.re = shift_up1(xOut.f[0][c].re, zn.re); z1.im = shift_up1(xOut.f[0][c].im, zn.im); z2.re = shift_up2(xOut.f[0][c].re, zn.re); z2.im = shift_up2(xOut.f[0][c].im, zn.im); z3.re = shift_up3(xOut.f[0][c].re, zn.re); z3.im = shift_up3(xOut.f[0][c].im, zn.im); rs->f[2][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[2][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[2][c].re; yOut.f[0][c].im = rs->f[2][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2451 "dwf.nw" #line 2582 "dwf.nw" zn.re = rs->f[3][c].re; zn.im = rs->f[3][c].im; z1.re = shift_up1(xOut.f[1][c].re, zn.re); z1.im = shift_up1(xOut.f[1][c].im, zn.im); z2.re = shift_up2(xOut.f[1][c].re, zn.re); z2.im = shift_up2(xOut.f[1][c].im, zn.im); z3.re = shift_up3(xOut.f[1][c].re, zn.re); z3.im = shift_up3(xOut.f[1][c].im, zn.im); rs->f[3][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[3][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[3][c].re; yOut.f[1][c].im = rs->f[3][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2452 "dwf.nw" } } #line 3252 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; es = &ex5[s]; for (c = 0; c < 3; c++) { #line 3262 "dwf.nw" vv = es->f[0][c].re - rs->f[0][c].re; rs->f[0][c].re = vv; nv += vv * vv; vv = es->f[0][c].im - rs->f[0][c].im; rs->f[0][c].im = vv; nv += vv * vv; vv = es->f[1][c].re - rs->f[1][c].re; rs->f[1][c].re = vv; nv += vv * vv; vv = es->f[1][c].im - rs->f[1][c].im; rs->f[1][c].im = vv; nv += vv * vv; vv = es->f[2][c].re - rs->f[2][c].re; rs->f[2][c].re = vv; nv += vv * vv; vv = es->f[2][c].im - rs->f[2][c].im; rs->f[2][c].im = vv; nv += vv * vv; vv = es->f[3][c].re - rs->f[3][c].re; rs->f[3][c].re = vv; nv += vv * vv; vv = es->f[3][c].im - rs->f[3][c].im; rs->f[3][c].im = vv; nv += vv * vv; #line 3257 "dwf.nw" } } #line 3232 "dwf.nw" } #line 2756 "dwf.nw" #line 3417 "dwf.nw" QMP_wait(nb->qmp_cr); #line 2757 "dwf.nw" #line 3236 "dwf.nw" for (i = 0; i < nb->boundary_size; i++) { const vFermion *ex5, *es; int m = nb->boundary[i].mask; xyzt = nb->boundary[i].index; xyzt5 = xyzt * S_4; #line 3352 "dwf.nw" rx5 = &chi[xyzt5]; #line 3243 "dwf.nw" ex5 = &eta[xyzt5]; #line 3276 "dwf.nw" Uup = &U[nb->site[xyzt].Uup]; for (d = 0; d < 4; d++, Uup++) { Udown = &U[nb->site[xyzt].Udown[d]]; for (c1 = 0; c1 < 3; c1++) { for (c2 = 0; c2 < 3; c2++) { V[d*2+0].v[c1][c2].re = vmk1(Uup->v[c1][c2].re); V[d*2+0].v[c1][c2].im = vmk1(Uup->v[c1][c2].im); /* conjugate down-link */ V[d*2+1].v[c1][c2].re = vmk1( Udown->v[c2][c1].re); V[d*2+1].v[c1][c2].im = vmk1(-Udown->v[c2][c1].im); } } } #line 3245 "dwf.nw" #line 2927 "dwf.nw" for (s = 0; s < S_4; s++) { #line 3292 "dwf.nw" for (d = 0; d < 8; d++) { ps[d] = p5[d] + s; } #line 2950 "dwf.nw" for (c = 0; c < 3; c++) { if ((m & 0x01) == 0) { k=0; f=&psi[ps[0]]; g=&gg[0]; #line 129 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].im; g->f[0][c].im = f->f[0][c].im + f->f[3][c].re; g->f[1][c].re = f->f[1][c].re - f->f[2][c].im; g->f[1][c].im = f->f[1][c].im + f->f[2][c].re; #line 2953 "dwf.nw" } if ((m & 0x02) == 0) { k=1; f=&psi[ps[1]]; g=&gg[1]; #line 142 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].im; g->f[0][c].im = f->f[0][c].im - f->f[3][c].re; g->f[1][c].re = f->f[1][c].re + f->f[2][c].im; g->f[1][c].im = f->f[1][c].im - f->f[2][c].re; #line 2956 "dwf.nw" } if ((m & 0x04) == 0) { k=2; f=&psi[ps[2]]; g=&gg[2]; #line 170 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[3][c].re; g->f[0][c].im = f->f[0][c].im - f->f[3][c].im; g->f[1][c].re = f->f[1][c].re + f->f[2][c].re; g->f[1][c].im = f->f[1][c].im + f->f[2][c].im; #line 2959 "dwf.nw" } if ((m & 0x08) == 0) { k=3; f=&psi[ps[3]]; g=&gg[3]; #line 183 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[3][c].re; g->f[0][c].im = f->f[0][c].im + f->f[3][c].im; g->f[1][c].re = f->f[1][c].re - f->f[2][c].re; g->f[1][c].im = f->f[1][c].im - f->f[2][c].im; #line 2962 "dwf.nw" } if ((m & 0x10) == 0) { k=4; f=&psi[ps[4]]; g=&gg[4]; #line 211 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].im; g->f[0][c].im = f->f[0][c].im + f->f[2][c].re; g->f[1][c].re = f->f[1][c].re + f->f[3][c].im; g->f[1][c].im = f->f[1][c].im - f->f[3][c].re; #line 2965 "dwf.nw" } if ((m & 0x20) == 0) { k=5; f=&psi[ps[5]]; g=&gg[5]; #line 224 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].im; g->f[0][c].im = f->f[0][c].im - f->f[2][c].re; g->f[1][c].re = f->f[1][c].re - f->f[3][c].im; g->f[1][c].im = f->f[1][c].im + f->f[3][c].re; #line 2968 "dwf.nw" } if ((m & 0x40) == 0) { k=6; f=&psi[ps[6]]; g=&gg[6]; #line 252 "dwf.nw" g->f[0][c].re = f->f[0][c].re + f->f[2][c].re; g->f[0][c].im = f->f[0][c].im + f->f[2][c].im; g->f[1][c].re = f->f[1][c].re + f->f[3][c].re; g->f[1][c].im = f->f[1][c].im + f->f[3][c].im; #line 2971 "dwf.nw" } if ((m & 0x80) == 0) { k=7; f=&psi[ps[7]]; g=&gg[7]; #line 265 "dwf.nw" g->f[0][c].re = f->f[0][c].re - f->f[2][c].re; g->f[0][c].im = f->f[0][c].im - f->f[2][c].im; g->f[1][c].re = f->f[1][c].re - f->f[3][c].re; g->f[1][c].im = f->f[1][c].im - f->f[3][c].im; #line 2974 "dwf.nw" } } #line 2929 "dwf.nw" #line 3001 "dwf.nw" for (d = 0; d < 8; d++) { vHalfFermion * __restrict__ h = &hh[d]; vSU3 *u = &V[d]; g = (m & (1 << d))? &nb->rcv_buf[d][ps[d]]: &gg[d]; #line 3009 "dwf.nw" for (c = 0; c < 3; c++) { h->f[0][c].re=u->v[c][0].re*g->f[0][0].re-u->v[c][0].im*g->f[0][0].im +u->v[c][1].re*g->f[0][1].re-u->v[c][1].im*g->f[0][1].im +u->v[c][2].re*g->f[0][2].re-u->v[c][2].im*g->f[0][2].im; h->f[0][c].im=u->v[c][0].im*g->f[0][0].re+u->v[c][0].re*g->f[0][0].im +u->v[c][1].im*g->f[0][1].re+u->v[c][1].re*g->f[0][1].im +u->v[c][2].im*g->f[0][2].re+u->v[c][2].re*g->f[0][2].im; h->f[1][c].re=u->v[c][0].re*g->f[1][0].re-u->v[c][0].im*g->f[1][0].im +u->v[c][1].re*g->f[1][1].re-u->v[c][1].im*g->f[1][1].im +u->v[c][2].re*g->f[1][2].re-u->v[c][2].im*g->f[1][2].im; h->f[1][c].im=u->v[c][0].im*g->f[1][0].re+u->v[c][0].re*g->f[1][0].im +u->v[c][1].im*g->f[1][1].re+u->v[c][1].re*g->f[1][1].im +u->v[c][2].im*g->f[1][2].re+u->v[c][2].re*g->f[1][2].im; } #line 3006 "dwf.nw" } #line 2930 "dwf.nw" #line 2978 "dwf.nw" rs = &rx5[s]; for (c = 0; c < 3; c++) { k = 6; #line 258 "dwf.nw" qs->f[0][c].re = gg[k].f[0][c].re; qs->f[2][c].re = gg[k].f[0][c].re; qs->f[0][c].im = gg[k].f[0][c].im; qs->f[2][c].im = gg[k].f[0][c].im; qs->f[1][c].re = gg[k].f[1][c].re; qs->f[3][c].re = gg[k].f[1][c].re; qs->f[1][c].im = gg[k].f[1][c].im; qs->f[3][c].im = gg[k].f[1][c].im; #line 2981 "dwf.nw" k = 7; #line 271 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].im -= gg[k].f[1][c].im; #line 2982 "dwf.nw" k = 2; #line 176 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im += gg[k].f[1][c].im; #line 2983 "dwf.nw" k = 3; #line 189 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].re += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].im += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].re -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].im -= gg[k].f[1][c].im; #line 2984 "dwf.nw" k = 0; #line 135 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re += gg[k].f[1][c].im; #line 2985 "dwf.nw" k = 1; #line 148 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[3][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[3][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[2][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[2][c].re -= gg[k].f[1][c].im; #line 2986 "dwf.nw" k = 4; #line 217 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im -= gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re += gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im += gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re -= gg[k].f[1][c].im; #line 2987 "dwf.nw" k = 5; #line 230 "dwf.nw" qs->f[0][c].re += gg[k].f[0][c].re; qs->f[2][c].im += gg[k].f[0][c].re; qs->f[0][c].im += gg[k].f[0][c].im; qs->f[2][c].re -= gg[k].f[0][c].im; qs->f[1][c].re += gg[k].f[1][c].re; qs->f[3][c].im -= gg[k].f[1][c].re; qs->f[1][c].im += gg[k].f[1][c].im; qs->f[3][c].re += gg[k].f[1][c].im; #line 2988 "dwf.nw" } #line 2931 "dwf.nw" } #line 3246 "dwf.nw" #line 2269 "dwf.nw" vhfzero(&zV); fx = ab_LA; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2272 "dwf.nw" for (s = 0; s < S_4_1; s++, fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2276 "dwf.nw" } rs = &rx5[S_4_1]; QSETUP(S_4_1) vput_3(&fx, c0); #line 2300 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[0][c].re; Q2R(0,re) zV.f[0][c].im += fx * qs->f[0][c].im; Q2R(0,im) zV.f[1][c].re += fx * qs->f[1][c].re; Q2R(1,re) zV.f[1][c].im += fx * qs->f[1][c].im; Q2R(1,im) } #line 2281 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2284 "dwf.nw" zn.re = qs->f[0][c].re; zn.im = qs->f[0][c].im; vput_3(&zn.re, zX[0][c].re); vput_3(&zn.im, zX[0][c].im); rs->f[0][c].re = zn.re; rs->f[0][c].im = zn.im; zn.re = qs->f[1][c].re; zn.im = qs->f[1][c].im; vput_3(&zn.re, zX[1][c].re); vput_3(&zn.im, zX[1][c].im); rs->f[1][c].re = zn.re; rs->f[1][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2417 "dwf.nw" for (s = S_4; s--;) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2463 "dwf.nw" zn.re = rs->f[0][c].re; zn.im = rs->f[0][c].im; z1.re = shift_down1(zn.re, xOut.f[0][c].re); z1.im = shift_down1(zn.im, xOut.f[0][c].im); z2.re = shift_down2(zn.re, xOut.f[0][c].re); z2.im = shift_down2(zn.im, xOut.f[0][c].im); z3.re = shift_down3(zn.re, xOut.f[0][c].re); z3.im = shift_down3(zn.im, xOut.f[0][c].im); rs->f[0][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[0][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[0][c].re; yOut.f[0][c].im = rs->f[0][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2421 "dwf.nw" #line 2480 "dwf.nw" zn.re = rs->f[1][c].re; zn.im = rs->f[1][c].im; z1.re = shift_down1(zn.re, xOut.f[1][c].re); z1.im = shift_down1(zn.im, xOut.f[1][c].im); z2.re = shift_down2(zn.re, xOut.f[1][c].re); z2.im = shift_down2(zn.im, xOut.f[1][c].im); z3.re = shift_down3(zn.re, xOut.f[1][c].re); z3.im = shift_down3(zn.im, xOut.f[1][c].im); rs->f[1][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[1][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[1][c].re; yOut.f[1][c].im = rs->f[1][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2422 "dwf.nw" } } #line 2377 "dwf.nw" vhfzero(&zV); fx = ab_LB; #line 2599 "dwf.nw" #if defined(qs) #define QSETUP(s) #define Q2R(d,pt) #else #define QSETUP(s) qs = &qx5[s]; #define Q2R(d,pt) rs->f[d][c].pt = qs->f[d][c].pt; #endif #line 2380 "dwf.nw" for (s = S_4; --s; fx = fx * va4) { rs = &rx5[s]; QSETUP(s) #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2384 "dwf.nw" } rs = &rx5[0]; QSETUP(0) vput_0(&fx, c0); #line 2337 "dwf.nw" for (c = 0; c < 3; c++) { zV.f[0][c].re += fx * qs->f[2][c].re; Q2R(2,re) zV.f[0][c].im += fx * qs->f[2][c].im; Q2R(2,im) zV.f[1][c].re += fx * qs->f[3][c].re; Q2R(3,re) zV.f[1][c].im += fx * qs->f[3][c].im; Q2R(3,im) } #line 2389 "dwf.nw" for (c = 0; c < 3; c++) { #line 2404 "dwf.nw" zX[0][c].re = vsum(zV.f[0][c].re); zX[0][c].im = vsum(zV.f[0][c].im); zX[1][c].re = vsum(zV.f[1][c].re); zX[1][c].im = vsum(zV.f[1][c].im); #line 2391 "dwf.nw" zn.re = qs->f[2][c].re; zn.im = qs->f[2][c].im; vput_0(&zn.re, zX[0][c].re); vput_0(&zn.im, zX[0][c].im); rs->f[2][c].re = zn.re; rs->f[2][c].im = zn.im; zn.re = qs->f[3][c].re; zn.im = qs->f[3][c].im; vput_0(&zn.re, zX[1][c].re); vput_0(&zn.im, zX[1][c].im); rs->f[3][c].re = zn.re; rs->f[3][c].im = zn.im; } #line 2608 "dwf.nw" #undef QSETUP #undef Q2R #line 2457 "dwf.nw" vhfzero(&xOut); vhfzero(&yOut); #line 2447 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; for (c = 0; c < 3; c++) { #line 2565 "dwf.nw" zn.re = rs->f[2][c].re; zn.im = rs->f[2][c].im; z1.re = shift_up1(xOut.f[0][c].re, zn.re); z1.im = shift_up1(xOut.f[0][c].im, zn.im); z2.re = shift_up2(xOut.f[0][c].re, zn.re); z2.im = shift_up2(xOut.f[0][c].im, zn.im); z3.re = shift_up3(xOut.f[0][c].re, zn.re); z3.im = shift_up3(xOut.f[0][c].im, zn.im); rs->f[2][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[0][c].re; rs->f[2][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[0][c].im; yOut.f[0][c].re = rs->f[2][c].re; yOut.f[0][c].im = rs->f[2][c].im; xOut.f[0][c].re = zn.re; xOut.f[0][c].im = zn.im; #line 2451 "dwf.nw" #line 2582 "dwf.nw" zn.re = rs->f[3][c].re; zn.im = rs->f[3][c].im; z1.re = shift_up1(xOut.f[1][c].re, zn.re); z1.im = shift_up1(xOut.f[1][c].im, zn.im); z2.re = shift_up2(xOut.f[1][c].re, zn.re); z2.im = shift_up2(xOut.f[1][c].im, zn.im); z3.re = shift_up3(xOut.f[1][c].re, zn.re); z3.im = shift_up3(xOut.f[1][c].im, zn.im); rs->f[3][c].re = va0*zn.re + va1*z1.re + va2*z2.re + va3*z3.re + va4*yOut.f[1][c].re; rs->f[3][c].im = va0*zn.im + va1*z1.im + va2*z2.im + va3*z3.im + va4*yOut.f[1][c].im; yOut.f[1][c].re = rs->f[3][c].re; yOut.f[1][c].im = rs->f[3][c].im; xOut.f[1][c].re = zn.re; xOut.f[1][c].im = zn.im; #line 2452 "dwf.nw" } } #line 3252 "dwf.nw" for (s = 0; s < S_4; s++) { rs = &rx5[s]; es = &ex5[s]; for (c = 0; c < 3; c++) { #line 3262 "dwf.nw" vv = es->f[0][c].re - rs->f[0][c].re; rs->f[0][c].re = vv; nv += vv * vv; vv = es->f[0][c].im - rs->f[0][c].im; rs->f[0][c].im = vv; nv += vv * vv; vv = es->f[1][c].re - rs->f[1][c].re; rs->f[1][c].re = vv; nv += vv * vv; vv = es->f[1][c].im - rs->f[1][c].im; rs->f[1][c].im = vv; nv += vv * vv; vv = es->f[2][c].re - rs->f[2][c].re; rs->f[2][c].re = vv; nv += vv * vv; vv = es->f[2][c].im - rs->f[2][c].im; rs->f[2][c].im = vv; nv += vv * vv; vv = es->f[3][c].re - rs->f[3][c].re; rs->f[3][c].re = vv; nv += vv * vv; vv = es->f[3][c].im - rs->f[3][c].im; rs->f[3][c].im = vv; nv += vv * vv; #line 3257 "dwf.nw" } } #line 3247 "dwf.nw" } #line 2758 "dwf.nw" *norm = vsum(nv); #line 3457 "dwf.nw" QMP_sum_double(norm); #line 2760 "dwf.nw" #line 2771 "dwf.nw" #undef qs #undef qx5 #line 2761 "dwf.nw" } #line 560 "dwf.nw" int SSE_DWF_init(const int lattice[DIM+1], SSE_DWF_FP_SIZE fp_size, void *(*allocator)(size_t size), void (*deallocator)(void *)) { if (inited_p) return 1; /* error: second init */ #line 606 "dwf.nw" if (fp_size != SSE_DWF_FLOAT) goto error; #line 570 "dwf.nw" #line 611 "dwf.nw" if (lattice[DIM] % Vs) goto error; tlattice[DIM] = lattice[DIM]; #line 618 "dwf.nw" { int i; for (i = 0; i < DIM; i++) { if (lattice[i] & 1) goto error; tlattice[i] = lattice[i]; } } #line 571 "dwf.nw" #line 924 "dwf.nw" { int i, dn; const QMP_u32_t *xn, *xc; if (!QMP_logical_topology_is_declared()) /* The user must have declared logical topology before */ goto error; dn = QMP_get_logical_number_of_dimensions(); if (dn > DIM) /* Too high dimension of the logical network */ goto error; xn = QMP_get_logical_dimensions(); xc = QMP_get_logical_coordinates(); for (i = 0; i < dn; i++) { network[i] = xn[i]; coord[i] = xc[i]; } for (; i < dn; i++) { network[i] = 1; coord[i] = 0; } } #line 572 "dwf.nw" #line 590 "dwf.nw" if (allocator) tmalloc = allocator; else tmalloc = malloc; if (deallocator) tfree = deallocator; else tfree = free; #line 573 "dwf.nw" #line 1158 "dwf.nw" if (init_tables()) { /* Something went wrong in the table construction */ goto error; } #line 574 "dwf.nw" #line 1855 "dwf.nw" Phi_o = allocate_odd_fermion(); if (Phi_o == 0) goto error; auxA_o = allocate_odd_fermion(); if (auxA_o == 0) goto error; auxB_o = allocate_odd_fermion(); if (auxB_o == 0) goto error; auxA_e = allocate_even_fermion(); if (auxA_e == 0) goto error; #line 1927 "dwf.nw" r_o = allocate_odd_fermion(); if (r_o == 0) goto error; p_o = allocate_odd_fermion(); if (p_o == 0) goto error; q_o = allocate_odd_fermion(); if (q_o == 0) goto error; #line 575 "dwf.nw" #line 1655 "dwf.nw" if (build_buffers(&even_odd)) goto error; if (build_buffers(&odd_even)) goto error; #line 576 "dwf.nw" inited_p = 1; return 0; #line 584 "dwf.nw" error: SSE_DWF_fini(); return 1; #line 580 "dwf.nw" } #line 634 "dwf.nw" void SSE_DWF_fini(void) { #line 1795 "dwf.nw" free_buffers(&even_odd); free_buffers(&odd_even); #line 638 "dwf.nw" #line 1861 "dwf.nw" if (auxA_e) free16(auxA_e); auxA_e = 0; if (auxB_o) free16(auxB_o); auxB_o = 0; if (auxA_o) free16(auxA_o); auxA_o = 0; if (Phi_o) free16(Phi_o); Phi_o = 0; #line 1932 "dwf.nw" if (r_o) free16(r_o); r_o = 0; if (p_o) free16(p_o); p_o = 0; if (q_o) free16(q_o); q_o = 0; #line 639 "dwf.nw" #line 1546 "dwf.nw" { int i; if (neighbor.site) { tfree(neighbor.site); neighbor.site = 0; } if (neighbor.inside) { tfree(neighbor.inside); neighbor.inside = 0; } if (neighbor.boundary) { tfree(neighbor.boundary); neighbor.boundary = 0; } for (i = 2 * DIM; i--;) { if (neighbor.snd[i] == 0) continue; tfree(neighbor.snd[i]); neighbor.snd[i] = 0; } } #line 640 "dwf.nw" inited_p = 0; } #line 656 "dwf.nw" SSE_DWF_Fermion * SSE_DWF_allocate_fermion(void) { SSE_DWF_Fermion *ptr; if (!inited_p) return 0; ptr = tmalloc(sizeof (*ptr)); if (ptr == 0) return 0; ptr->even = allocate_even_fermion(); if (ptr->even == 0) goto error1; ptr->odd = allocate_odd_fermion(); if (ptr->odd == 0) goto error2; return ptr; error2: free16(ptr->even); error1: tfree(ptr); return 0; } #line 689 "dwf.nw" SSE_DWF_Fermion * SSE_DWF_load_fermion(const void *OuterFermion, void *env, SSE_DWF_fermion_reader reader) { SSE_DWF_Fermion *ptr = SSE_DWF_allocate_fermion(); /* Handle both lack of memory and missing initialization */ if (ptr == 0) return 0; #line 1059 "dwf.nw" { int x[DIM+1], i; #line 985 "dwf.nw" for (i = 0; i < DIM; i++) x[i] = bounds.lo[i]; for (i = 0; i < DIM;) { #line 1063 "dwf.nw" #line 1070 "dwf.nw" { int p = parity(x); int p1 = to_HFlinear(x, &bounds, -1, 0); /* magic: p is taken care of! */ vFermion *f = p? &ptr->odd[p1].f: &ptr->even[p1].f; for (x[DIM] = 0; x[DIM] < tlattice[DIM]; x[DIM] += Vs, f++) { int d; for (d = 0; d < Fd; d++) { int c; for (c = 0; c < Nc; c++) { f->f[d][c].re = import_vector(OuterFermion, env, reader, x, c, d, 0); f->f[d][c].im = import_vector(OuterFermion, env, reader, x, c, d, 1); } } } } #line 1064 "dwf.nw" #line 996 "dwf.nw" for (i = 0; i < DIM; i++) { #line 1019 "dwf.nw" if (++x[i] == bounds.hi[i]) x[i] = bounds.lo[i]; else break; #line 998 "dwf.nw" } } #line 1065 "dwf.nw" } #line 702 "dwf.nw" return ptr; } #line 708 "dwf.nw" void SSE_DWF_save_fermion(void *OuterFermion, void *env, SSE_DWF_fermion_writer writer, SSE_DWF_Fermion *CGfermion) { if (!inited_p) return; #line 1109 "dwf.nw" { int x[DIM+1], i; #line 985 "dwf.nw" for (i = 0; i < DIM; i++) x[i] = bounds.lo[i]; for (i = 0; i < DIM;) { #line 1113 "dwf.nw" #line 1118 "dwf.nw" { int p = parity(x); int p1 = to_HFlinear(x, &bounds, -1, 0); /* magic: p is taken care of! */ vFermion *f = p? &CGfermion->odd[p1].f: &CGfermion->even[p1].f; for (x[DIM] = 0; x[DIM] < tlattice[DIM]; x[DIM] += Vs, f++) { int d; for (d = 0; d < Fd; d++) { int c; for (c = 0; c < Nc; c++) { save_vector(OuterFermion, env, writer, x, c, d, 0, f->f[d][c].re); save_vector(OuterFermion, env, writer, x, c, d, 1, f->f[d][c].im); } } } } #line 1114 "dwf.nw" #line 996 "dwf.nw" for (i = 0; i < DIM; i++) { #line 1019 "dwf.nw" if (++x[i] == bounds.hi[i]) x[i] = bounds.lo[i]; else break; #line 998 "dwf.nw" } } #line 1115 "dwf.nw" } #line 718 "dwf.nw" } #line 724 "dwf.nw" void SSE_DWF_delete_fermion(SSE_DWF_Fermion *ptr) { if (!inited_p) return; free16(ptr->even); free16(ptr->odd); tfree(ptr); } #line 740 "dwf.nw" SSE_DWF_Gauge * SSE_DWF_load_gauge(const void *OuterGauge_U, const void *OuterGauge_V, void *env, SSE_DWF_gauge_reader reader) { SSE_DWF_Gauge *g; if (!inited_p) return 0; g = allocate_gauge_field(); if (g == 0) return 0; #line 960 "dwf.nw" { int x[DIM], i, d, a, b, p1; #line 985 "dwf.nw" for (i = 0; i < DIM; i++) x[i] = bounds.lo[i]; for (i = 0; i < DIM;) { #line 964 "dwf.nw" #line 973 "dwf.nw" p1 = to_Ulinear(x, &bounds, -1); for (d = 0; d < DIM; d++) { for (a = 0; a < Nc; a++) { for (b = 0; b < Nc; b++) { g[p1 + d].v[a][b].re = reader(OuterGauge_U, env, x, d, a, b, 0); g[p1 + d].v[a][b].im = reader(OuterGauge_U, env, x, d, a, b, 1); } } } #line 965 "dwf.nw" #line 996 "dwf.nw" for (i = 0; i < DIM; i++) { #line 1019 "dwf.nw" if (++x[i] == bounds.hi[i]) x[i] = bounds.lo[i]; else break; #line 998 "dwf.nw" } } #line 967 "dwf.nw" for (d = 0; d < DIM; d++) #line 1033 "dwf.nw" { if (network[d] == 1) continue; #line 985 "dwf.nw" for (i = 0; i < DIM; i++) x[i] = bounds.lo[i]; for (i = 0; i < DIM;) { #line 1038 "dwf.nw" #line 1044 "dwf.nw" x[d] = bounds.lo[d] - 1; p1 = to_Ulinear(x, &bounds, d); x[d] = bounds.lo[d]; for (a = 0; a < Nc; a++) { for (b = 0; b < Nc; b++) { g[p1].v[a][b].re = reader(OuterGauge_V, env, x, d, a, b, 0); g[p1].v[a][b].im = reader(OuterGauge_V, env, x, d, a, b, 1); } } #line 1039 "dwf.nw" #line 1010 "dwf.nw" for (i = 0; i < DIM; i++) { if (i == d) continue; #line 1019 "dwf.nw" if (++x[i] == bounds.hi[i]) x[i] = bounds.lo[i]; else break; #line 1014 "dwf.nw" } } #line 1040 "dwf.nw" } #line 969 "dwf.nw" } #line 756 "dwf.nw" return g; } #line 770 "dwf.nw" void SSE_DWF_delete_gauge(SSE_DWF_Gauge *ptr) { if (!inited_p) return; free16(ptr); } #line 783 "dwf.nw" int SSE_DWF_cg_solver(SSE_DWF_Fermion *psi, /* result */ double *out_eps, int *out_iter, const SSE_DWF_Gauge *gauge, double m0, double M, const SSE_DWF_Fermion *x0, /* guess */ const SSE_DWF_Fermion *eta, /* rhs */ double eps, int max_iter) { int status; if (!inited_p) return 1; U = (SU3 *)gauge; #line 3381 "dwf.nw" { double a = M; double b = 2.; double c = -2*m0; vReal ab = vmk1(-b/a); c0 = 1./(1+c/b*pow(b/a, S_4*4)); ab_LA = vmk4(c*c0/a,-b*c*c0/(a*a), b*b*c*c0/(a*a*a), -b*b*b*c*c0/(a*a*a*a)); ab_LB = vmk4(-b*b*b*c*c0/(a*a*a*a), b*b*c*c0/(a*a*a), -b*c*c0/(a*a), c*c0/a); va0 = vmk1(1/a); va1 = va0 * ab; va2 = va1 * ab; va3 = va2 * ab; va4 = va3 * ab; } #line 800 "dwf.nw" #line 1844 "dwf.nw" compute_Qee1(auxA_e, eta->even); compute_Qoe(auxB_o, auxA_e); compute_sum_o(auxA_o, eta->odd, -1, auxB_o); compute_Qoo1(auxB_o, auxA_o); compute_Mx(Phi_o, auxB_o); #line 801 "dwf.nw" #line 1872 "dwf.nw" status = cg(psi->odd, Phi_o, x0->odd, eps, max_iter, out_eps, out_iter); #line 802 "dwf.nw" #line 1940 "dwf.nw" compute_Qeo(auxA_e, psi->odd); compute_sum_e(auxB_e, eta->even, -1, auxA_e); compute_Qee1(psi->even, auxB_e); #line 803 "dwf.nw" return status; }