//go:build amd64 // +build amd64 package common import ( "golang.org/x/sys/cpu" ) // ZetasAVX2 contains all ζ used in NTT (like the Zetas array), but also // the values int16(zeta * 62209) for each zeta, which is used in // Montgomery reduction. There is some duplication and reordering as // compared to Zetas to make it more covenient for use with AVX2. var ZetasAVX2 = [...]int16{ // level 1: int16(Zetas[1]*62209) and Zetas[1] 31499, 2571, // level 2 // // int16(Zetas[2]*62209), Zetas[2], int16(Zetas[3]*62209), Zetas[3] 14746, 2970, 788, 1812, // level 3, like level 2. 13525, 1493, -12402, 1422, 28191, 287, -16694, 202, 0, 0, // padding // layer 4. offset: 1*16 // // The precomputed multiplication and zetas are grouped by 16 at a // time as used in the set of butterflies, etc. -20906, -20906, -20906, -20906, -20906, -20906, -20906, -20906, 27758, 27758, 27758, 27758, 27758, 27758, 27758, 27758, 3158, 3158, 3158, 3158, 3158, 3158, 3158, 3158, 622, 622, 622, 622, 622, 622, 622, 622, -3799, -3799, -3799, -3799, -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, -15690, -15690, -15690, -15690, 1577, 1577, 1577, 1577, 1577, 1577, 1577, 1577, 182, 182, 182, 182, 182, 182, 182, 182, 10690, 10690, 10690, 10690, 10690, 10690, 10690, 10690, 1359, 1359, 1359, 1359, 1359, 1359, 1359, 1359, 962, 962, 962, 962, 962, 962, 962, 962, 2127, 2127, 2127, 2127, 2127, 2127, 2127, 2127, -11201, -11201, -11201, -11201, -11201, -11201, -11201, -11201, 31164, 31164, 31164, 31164, 31164, 31164, 31164, 31164, 1855, 1855, 1855, 1855, 1855, 1855, 1855, 1855, 1468, 1468, 1468, 1468, 1468, 1468, 1468, 1468, // layer 5. offset: 9*16 -5827, -5827, -5827, -5827, 17364, 17364, 17364, 17364, -26360, -26360, -26360, -26360, -29057, -29057, -29057, -29057, 573, 573, 573, 573, 2004, 2004, 2004, 2004, 264, 264, 264, 264, 383, 383, 383, 383, 5572, 5572, 5572, 5572, -1102, -1102, -1102, -1102, 21439, 21439, 21439, 21439, -26241, -26241, -26241, -26241, 2500, 2500, 2500, 2500, 1458, 1458, 1458, 1458, 1727, 1727, 1727, 1727, 3199, 3199, 3199, 3199, -28072, -28072, -28072, -28072, 24313, 24313, 24313, 24313, -10532, -10532, -10532, -10532, 8800, 8800, 8800, 8800, 2648, 2648, 2648, 2648, 1017, 1017, 1017, 1017, 732, 732, 732, 732, 608, 608, 608, 608, 18427, 18427, 18427, 18427, 8859, 8859, 8859, 8859, 26676, 26676, 26676, 26676, -16162, -16162, -16162, -16162, 1787, 1787, 1787, 1787, 411, 411, 411, 411, 3124, 3124, 3124, 3124, 1758, 1758, 1758, 1758, // layer 6. offset: 17*16 -5689, -5689, -6516, -6516, 1497, 1497, 30967, 30967, -23564, -23564, 20179, 20179, 20711, 20711, 25081, 25081, 1223, 1223, 652, 652, 2777, 2777, 1015, 1015, 2036, 2036, 1491, 1491, 3047, 3047, 1785, 1785, -12796, -12796, 26617, 26617, 16065, 16065, -12441, -12441, 9135, 9135, -649, -649, -25986, -25986, 27837, 27837, 516, 516, 3321, 3321, 3009, 3009, 2663, 2663, 1711, 1711, 2167, 2167, 126, 126, 1469, 1469, 19884, 19884, -28249, -28249, -15886, -15886, -8898, -8898, -28309, -28309, 9076, 9076, -30198, -30198, 18250, 18250, 2476, 2476, 3239, 3239, 3058, 3058, 830, 830, 107, 107, 1908, 1908, 3082, 3082, 2378, 2378, 13427, 13427, 14017, 14017, -29155, -29155, -12756, -12756, 16832, 16832, 4312, 4312, -24155, -24155, -17914, -17914, 2931, 2931, 961, 961, 1821, 1821, 2604, 2604, 448, 448, 2264, 2264, 677, 677, 2054, 2054, // layer 7. offset: 25*16 -334, 11182, -11477, 13387, -32226, -14233, 20494, -21655, -27738, 13131, 945, -4586, -14882, 23093, 6182, 5493, 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, 32011, -32502, 10631, 30318, 29176, -18741, -28761, 12639, -18485, 20100, 17561, 18525, -14430, 19529, -5275, -12618, 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, -31183, 20297, 25435, 2146, -7382, 15356, 24392, -32384, -20926, -6279, 10946, -14902, 24215, -11044, 16990, 14470, 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, 10336, -21497, -7933, -20198, -22501, 23211, 10907, -17442, 31637, -23859, 28644, -20257, 23998, 7757, -17422, 23132, 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628, // layer 1 inverse 23132, -17422, 7757, 23998, -20257, 28644, -23859, 31637, -17442, 10907, 23211, -22501, -20198, -7933, -21497, 10336, 1628, 1522, 1869, 958, 991, 996, 3021, 3221, 478, 2459, 2475, 1819, 794, 2051, 1799, 2144, 14470, 16990, -11044, 24215, -14902, 10946, -6279, -20926, -32384, 24392, 15356, -7382, 2146, 25435, 20297, -31183, 1670, 2142, 220, 2455, 1994, 1218, 3193, 2114, 384, 1864, 2044, 1322, 610, 603, 1097, 817, -12618, -5275, 19529, -14430, 18525, 17561, 20100, -18485, 12639, -28761, -18741, 29176, 30318, 10631, -32502, 32011, 3254, 3173, 329, 418, 349, 2457, 644, 1739, 1119, 2727, 1483, 2552, 3182, 1159, 778, 3083, 5493, 6182, 23093, -14882, -4586, 945, 13131, -27738, -21655, 20494, -14233, -32226, 13387, -11477, 11182, -334, 1653, 1574, 2869, 3038, 3094, 177, 587, 422, 105, 1550, 871, 2078, 843, 555, 430, 2226, // layer 2 inverse -17914, -17914, -24155, -24155, 4312, 4312, 16832, 16832, -12756, -12756, -29155, -29155, 14017, 14017, 13427, 13427, 2054, 2054, 677, 677, 2264, 2264, 448, 448, 2604, 2604, 1821, 1821, 961, 961, 2931, 2931, 18250, 18250, -30198, -30198, 9076, 9076, -28309, -28309, -8898, -8898, -15886, -15886, -28249, -28249, 19884, 19884, 2378, 2378, 3082, 3082, 1908, 1908, 107, 107, 830, 830, 3058, 3058, 3239, 3239, 2476, 2476, 27837, 27837, -25986, -25986, -649, -649, 9135, 9135, -12441, -12441, 16065, 16065, 26617, 26617, -12796, -12796, 1469, 1469, 126, 126, 2167, 2167, 1711, 1711, 2663, 2663, 3009, 3009, 3321, 3321, 516, 516, 25081, 25081, 20711, 20711, 20179, 20179, -23564, -23564, 30967, 30967, 1497, 1497, -6516, -6516, -5689, -5689, 1785, 1785, 3047, 3047, 1491, 1491, 2036, 2036, 1015, 1015, 2777, 2777, 652, 652, 1223, 1223, // layer 3 inverse -16162, -16162, -16162, -16162, 26676, 26676, 26676, 26676, 8859, 8859, 8859, 8859, 18427, 18427, 18427, 18427, 1758, 1758, 1758, 1758, 3124, 3124, 3124, 3124, 411, 411, 411, 411, 1787, 1787, 1787, 1787, 8800, 8800, 8800, 8800, -10532, -10532, -10532, -10532, 24313, 24313, 24313, 24313, -28072, -28072, -28072, -28072, 608, 608, 608, 608, 732, 732, 732, 732, 1017, 1017, 1017, 1017, 2648, 2648, 2648, 2648, -26241, -26241, -26241, -26241, 21439, 21439, 21439, 21439, -1102, -1102, -1102, -1102, 5572, 5572, 5572, 5572, 3199, 3199, 3199, 3199, 1727, 1727, 1727, 1727, 1458, 1458, 1458, 1458, 2500, 2500, 2500, 2500, -29057, -29057, -29057, -29057, -26360, -26360, -26360, -26360, 17364, 17364, 17364, 17364, -5827, -5827, -5827, -5827, 383, 383, 383, 383, 264, 264, 264, 264, 2004, 2004, 2004, 2004, 573, 573, 573, 573, // layer 4 inverse 31164, 31164, 31164, 31164, 31164, 31164, 31164, 31164, -11201, -11201, -11201, -11201, -11201, -11201, -11201, -11201, 1468, 1468, 1468, 1468, 1468, 1468, 1468, 1468, 1855, 1855, 1855, 1855, 1855, 1855, 1855, 1855, 1359, 1359, 1359, 1359, 1359, 1359, 1359, 1359, 10690, 10690, 10690, 10690, 10690, 10690, 10690, 10690, 2127, 2127, 2127, 2127, 2127, 2127, 2127, 2127, 962, 962, 962, 962, 962, 962, 962, 962, -15690, -15690, -15690, -15690, -15690, -15690, -15690, -15690, -3799, -3799, -3799, -3799, -3799, -3799, -3799, -3799, 182, 182, 182, 182, 182, 182, 182, 182, 1577, 1577, 1577, 1577, 1577, 1577, 1577, 1577, 27758, 27758, 27758, 27758, 27758, 27758, 27758, 27758, -20906, -20906, -20906, -20906, -20906, -20906, -20906, -20906, 622, 622, 622, 622, 622, 622, 622, 622, 3158, 3158, 3158, 3158, 3158, 3158, 3158, 3158, // layer 5 inverse -16694, 202, 28191, 287, -12402, 1422, 13525, 1493, // layer 6 inverse 788, 1812, 14746, 2970, // layer 7 inverse 31499, 2571, } // Sets p to a + b. Does not normalize coefficients. func (p *Poly) Add(a, b *Poly) { if cpu.X86.HasAVX2 { addAVX2( (*[N]int16)(p), (*[N]int16)(a), (*[N]int16)(b), ) } else { p.addGeneric(a, b) } } // Sets p to a - b. Does not normalize coefficients. func (p *Poly) Sub(a, b *Poly) { if cpu.X86.HasAVX2 { subAVX2( (*[N]int16)(p), (*[N]int16)(a), (*[N]int16)(b), ) } else { p.subGeneric(a, b) } } // Executes an in-place forward "NTT" on p. // // Assumes the coefficients are in absolute value ≤q. The resulting // coefficients are in absolute value ≤7q. If the input is in Montgomery // form, then the result is in Montgomery form and so (by linearity of the NTT) // if the input is in regular form, then the result is also in regular form. // The order of coefficients will be "tangled". These can be put back into // their proper order by calling Detangle(). func (p *Poly) NTT() { if cpu.X86.HasAVX2 { nttAVX2((*[N]int16)(p)) } else { p.nttGeneric() } } // Executes an in-place inverse "NTT" on p and multiply by the Montgomery // factor R. // // Requires coefficients to be in "tangled" order, see Tangle(). // Assumes the coefficients are in absolute value ≤q. The resulting // coefficients are in absolute value ≤q. If the input is in Montgomery // form, then the result is in Montgomery form and so (by linearity) // if the input is in regular form, then the result is also in regular form. func (p *Poly) InvNTT() { if cpu.X86.HasAVX2 { invNttAVX2((*[N]int16)(p)) } else { p.invNTTGeneric() } } // Sets p to the "pointwise" multiplication of a and b. // // That is: InvNTT(p) = InvNTT(a) * InvNTT(b). Assumes a and b are in // Montgomery form. Products between coefficients of a and b must be strictly // bounded in absolute value by 2¹⁵q. p will be in Montgomery form and // bounded in absolute value by 2q. // // Requires a and b to be in "tangled" order, see Tangle(). p will be in // tangled order as well. func (p *Poly) MulHat(a, b *Poly) { if cpu.X86.HasAVX2 { mulHatAVX2( (*[N]int16)(p), (*[N]int16)(a), (*[N]int16)(b), ) } else { p.mulHatGeneric(a, b) } } // Puts p into the right form to be used with (among others) InvNTT(). func (p *Poly) Tangle() { if cpu.X86.HasAVX2 { tangleAVX2((*[N]int16)(p)) } // When AVX2 is not available, we use the standard order. } // Puts p back into standard form. func (p *Poly) Detangle() { if cpu.X86.HasAVX2 { detangleAVX2((*[N]int16)(p)) } // When AVX2 is not available, we use the standard order. } // Almost normalizes coefficients. // // Ensures each coefficient is in {0, …, q}. func (p *Poly) BarrettReduce() { if cpu.X86.HasAVX2 { barrettReduceAVX2((*[N]int16)(p)) } else { p.barrettReduceGeneric() } } // Normalizes coefficients. // // Ensures each coefficient is in {0, …, q-1}. func (p *Poly) Normalize() { if cpu.X86.HasAVX2 { normalizeAVX2((*[N]int16)(p)) } else { p.normalizeGeneric() } }