1// Code generated by mkasm.go. DO NOT EDIT.
2
3#include "go_asm.h"
4#include "textflag.h"
5
6GLOBL ·gcExpandersAVX512(SB), RODATA, $0x220
7DATA ·gcExpandersAVX512+0x00(SB)/8, $0
8DATA ·gcExpandersAVX512+0x08(SB)/8, $expandAVX512_1<>(SB)
9DATA ·gcExpandersAVX512+0x10(SB)/8, $expandAVX512_2<>(SB)
10DATA ·gcExpandersAVX512+0x18(SB)/8, $expandAVX512_3<>(SB)
11DATA ·gcExpandersAVX512+0x20(SB)/8, $expandAVX512_4<>(SB)
12DATA ·gcExpandersAVX512+0x28(SB)/8, $expandAVX512_6<>(SB)
13DATA ·gcExpandersAVX512+0x30(SB)/8, $expandAVX512_8<>(SB)
14DATA ·gcExpandersAVX512+0x38(SB)/8, $expandAVX512_10<>(SB)
15DATA ·gcExpandersAVX512+0x40(SB)/8, $expandAVX512_12<>(SB)
16DATA ·gcExpandersAVX512+0x48(SB)/8, $expandAVX512_14<>(SB)
17DATA ·gcExpandersAVX512+0x50(SB)/8, $expandAVX512_16<>(SB)
18DATA ·gcExpandersAVX512+0x58(SB)/8, $expandAVX512_18<>(SB)
19DATA ·gcExpandersAVX512+0x60(SB)/8, $expandAVX512_20<>(SB)
20DATA ·gcExpandersAVX512+0x68(SB)/8, $expandAVX512_22<>(SB)
21DATA ·gcExpandersAVX512+0x70(SB)/8, $expandAVX512_24<>(SB)
22DATA ·gcExpandersAVX512+0x78(SB)/8, $expandAVX512_26<>(SB)
23DATA ·gcExpandersAVX512+0x80(SB)/8, $expandAVX512_28<>(SB)
24DATA ·gcExpandersAVX512+0x88(SB)/8, $expandAVX512_30<>(SB)
25DATA ·gcExpandersAVX512+0x90(SB)/8, $expandAVX512_32<>(SB)
26DATA ·gcExpandersAVX512+0x98(SB)/8, $expandAVX512_36<>(SB)
27DATA ·gcExpandersAVX512+0xa0(SB)/8, $expandAVX512_40<>(SB)
28DATA ·gcExpandersAVX512+0xa8(SB)/8, $expandAVX512_44<>(SB)
29DATA ·gcExpandersAVX512+0xb0(SB)/8, $expandAVX512_48<>(SB)
30DATA ·gcExpandersAVX512+0xb8(SB)/8, $expandAVX512_52<>(SB)
31DATA ·gcExpandersAVX512+0xc0(SB)/8, $expandAVX512_56<>(SB)
32DATA ·gcExpandersAVX512+0xc8(SB)/8, $expandAVX512_60<>(SB)
33DATA ·gcExpandersAVX512+0xd0(SB)/8, $expandAVX512_64<>(SB)
34DATA ·gcExpandersAVX512+0xd8(SB)/8, $0
35DATA ·gcExpandersAVX512+0xe0(SB)/8, $0
36DATA ·gcExpandersAVX512+0xe8(SB)/8, $0
37DATA ·gcExpandersAVX512+0xf0(SB)/8, $0
38DATA ·gcExpandersAVX512+0xf8(SB)/8, $0
39DATA ·gcExpandersAVX512+0x100(SB)/8, $0
40DATA ·gcExpandersAVX512+0x108(SB)/8, $0
41DATA ·gcExpandersAVX512+0x110(SB)/8, $0
42DATA ·gcExpandersAVX512+0x118(SB)/8, $0
43DATA ·gcExpandersAVX512+0x120(SB)/8, $0
44DATA ·gcExpandersAVX512+0x128(SB)/8, $0
45DATA ·gcExpandersAVX512+0x130(SB)/8, $0
46DATA ·gcExpandersAVX512+0x138(SB)/8, $0
47DATA ·gcExpandersAVX512+0x140(SB)/8, $0
48DATA ·gcExpandersAVX512+0x148(SB)/8, $0
49DATA ·gcExpandersAVX512+0x150(SB)/8, $0
50DATA ·gcExpandersAVX512+0x158(SB)/8, $0
51DATA ·gcExpandersAVX512+0x160(SB)/8, $0
52DATA ·gcExpandersAVX512+0x168(SB)/8, $0
53DATA ·gcExpandersAVX512+0x170(SB)/8, $0
54DATA ·gcExpandersAVX512+0x178(SB)/8, $0
55DATA ·gcExpandersAVX512+0x180(SB)/8, $0
56DATA ·gcExpandersAVX512+0x188(SB)/8, $0
57DATA ·gcExpandersAVX512+0x190(SB)/8, $0
58DATA ·gcExpandersAVX512+0x198(SB)/8, $0
59DATA ·gcExpandersAVX512+0x1a0(SB)/8, $0
60DATA ·gcExpandersAVX512+0x1a8(SB)/8, $0
61DATA ·gcExpandersAVX512+0x1b0(SB)/8, $0
62DATA ·gcExpandersAVX512+0x1b8(SB)/8, $0
63DATA ·gcExpandersAVX512+0x1c0(SB)/8, $0
64DATA ·gcExpandersAVX512+0x1c8(SB)/8, $0
65DATA ·gcExpandersAVX512+0x1d0(SB)/8, $0
66DATA ·gcExpandersAVX512+0x1d8(SB)/8, $0
67DATA ·gcExpandersAVX512+0x1e0(SB)/8, $0
68DATA ·gcExpandersAVX512+0x1e8(SB)/8, $0
69DATA ·gcExpandersAVX512+0x1f0(SB)/8, $0
70DATA ·gcExpandersAVX512+0x1f8(SB)/8, $0
71DATA ·gcExpandersAVX512+0x200(SB)/8, $0
72DATA ·gcExpandersAVX512+0x208(SB)/8, $0
73DATA ·gcExpandersAVX512+0x210(SB)/8, $0
74DATA ·gcExpandersAVX512+0x218(SB)/8, $0
75
76TEXT expandAVX512_1<>(SB), NOSPLIT, $0-0
77 VMOVDQU64 (AX), Z1
78 VMOVDQU64 64(AX), Z2
79 RET
80
81GLOBL expandAVX512_2_inShuf0<>(SB), RODATA, $0x40
82DATA expandAVX512_2_inShuf0<>+0x00(SB)/8, $0x0706050403020100
83DATA expandAVX512_2_inShuf0<>+0x08(SB)/8, $0x0706050403020100
84DATA expandAVX512_2_inShuf0<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
85DATA expandAVX512_2_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
86DATA expandAVX512_2_inShuf0<>+0x20(SB)/8, $0x1716151413121110
87DATA expandAVX512_2_inShuf0<>+0x28(SB)/8, $0x1716151413121110
88DATA expandAVX512_2_inShuf0<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
89DATA expandAVX512_2_inShuf0<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
90
91GLOBL expandAVX512_2_mat0<>(SB), RODATA, $0x40
92DATA expandAVX512_2_mat0<>+0x00(SB)/8, $0x0101020204040808
93DATA expandAVX512_2_mat0<>+0x08(SB)/8, $0x1010202040408080
94DATA expandAVX512_2_mat0<>+0x10(SB)/8, $0x0101020204040808
95DATA expandAVX512_2_mat0<>+0x18(SB)/8, $0x1010202040408080
96DATA expandAVX512_2_mat0<>+0x20(SB)/8, $0x0101020204040808
97DATA expandAVX512_2_mat0<>+0x28(SB)/8, $0x1010202040408080
98DATA expandAVX512_2_mat0<>+0x30(SB)/8, $0x0101020204040808
99DATA expandAVX512_2_mat0<>+0x38(SB)/8, $0x1010202040408080
100
101GLOBL expandAVX512_2_inShuf1<>(SB), RODATA, $0x40
102DATA expandAVX512_2_inShuf1<>+0x00(SB)/8, $0x2726252423222120
103DATA expandAVX512_2_inShuf1<>+0x08(SB)/8, $0x2726252423222120
104DATA expandAVX512_2_inShuf1<>+0x10(SB)/8, $0x2f2e2d2c2b2a2928
105DATA expandAVX512_2_inShuf1<>+0x18(SB)/8, $0x2f2e2d2c2b2a2928
106DATA expandAVX512_2_inShuf1<>+0x20(SB)/8, $0x3736353433323130
107DATA expandAVX512_2_inShuf1<>+0x28(SB)/8, $0x3736353433323130
108DATA expandAVX512_2_inShuf1<>+0x30(SB)/8, $0x3f3e3d3c3b3a3938
109DATA expandAVX512_2_inShuf1<>+0x38(SB)/8, $0x3f3e3d3c3b3a3938
110
111GLOBL expandAVX512_2_outShufLo(SB), RODATA, $0x40
112DATA expandAVX512_2_outShufLo+0x00(SB)/8, $0x0b030a0209010800
113DATA expandAVX512_2_outShufLo+0x08(SB)/8, $0x0f070e060d050c04
114DATA expandAVX512_2_outShufLo+0x10(SB)/8, $0x1b131a1219111810
115DATA expandAVX512_2_outShufLo+0x18(SB)/8, $0x1f171e161d151c14
116DATA expandAVX512_2_outShufLo+0x20(SB)/8, $0x2b232a2229212820
117DATA expandAVX512_2_outShufLo+0x28(SB)/8, $0x2f272e262d252c24
118DATA expandAVX512_2_outShufLo+0x30(SB)/8, $0x3b333a3239313830
119DATA expandAVX512_2_outShufLo+0x38(SB)/8, $0x3f373e363d353c34
120
121TEXT expandAVX512_2<>(SB), NOSPLIT, $0-0
122 VMOVDQU64 expandAVX512_2_inShuf0<>(SB), Z0
123 VMOVDQU64 expandAVX512_2_mat0<>(SB), Z1
124 VMOVDQU64 expandAVX512_2_inShuf1<>(SB), Z2
125 VMOVDQU64 expandAVX512_2_outShufLo(SB), Z3
126 VMOVDQU64 (AX), Z4
127 VPERMB Z4, Z0, Z0
128 VGF2P8AFFINEQB $0, Z1, Z0, Z0
129 VPERMB Z4, Z2, Z2
130 VGF2P8AFFINEQB $0, Z1, Z2, Z2
131 VPERMB Z0, Z3, Z1
132 VPERMB Z2, Z3, Z2
133 RET
134
135GLOBL expandAVX512_3_inShuf0<>(SB), RODATA, $0x40
136DATA expandAVX512_3_inShuf0<>+0x00(SB)/8, $0x0706050403020100
137DATA expandAVX512_3_inShuf0<>+0x08(SB)/8, $0x0706050403020100
138DATA expandAVX512_3_inShuf0<>+0x10(SB)/8, $0x0706050403020100
139DATA expandAVX512_3_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
140DATA expandAVX512_3_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
141DATA expandAVX512_3_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
142DATA expandAVX512_3_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
143DATA expandAVX512_3_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
144
145GLOBL expandAVX512_3_mat0<>(SB), RODATA, $0x40
146DATA expandAVX512_3_mat0<>+0x00(SB)/8, $0x0101010202020404
147DATA expandAVX512_3_mat0<>+0x08(SB)/8, $0x0408080810101020
148DATA expandAVX512_3_mat0<>+0x10(SB)/8, $0x2020404040808080
149DATA expandAVX512_3_mat0<>+0x18(SB)/8, $0x0101010202020404
150DATA expandAVX512_3_mat0<>+0x20(SB)/8, $0x0408080810101020
151DATA expandAVX512_3_mat0<>+0x28(SB)/8, $0x2020404040808080
152DATA expandAVX512_3_mat0<>+0x30(SB)/8, $0x0000000000000000
153DATA expandAVX512_3_mat0<>+0x38(SB)/8, $0x0000000000000000
154
155GLOBL expandAVX512_3_inShuf1<>(SB), RODATA, $0x40
156DATA expandAVX512_3_inShuf1<>+0x00(SB)/8, $0x1716151413121110
157DATA expandAVX512_3_inShuf1<>+0x08(SB)/8, $0x1716151413121110
158DATA expandAVX512_3_inShuf1<>+0x10(SB)/8, $0x1716151413121110
159DATA expandAVX512_3_inShuf1<>+0x18(SB)/8, $0x1f1e1d1c1b1a1918
160DATA expandAVX512_3_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
161DATA expandAVX512_3_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
162DATA expandAVX512_3_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
163DATA expandAVX512_3_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
164
165GLOBL expandAVX512_3_inShuf2<>(SB), RODATA, $0x40
166DATA expandAVX512_3_inShuf2<>+0x00(SB)/8, $0x2726252423222120
167DATA expandAVX512_3_inShuf2<>+0x08(SB)/8, $0x2726252423222120
168DATA expandAVX512_3_inShuf2<>+0x10(SB)/8, $0x2726252423222120
169DATA expandAVX512_3_inShuf2<>+0x18(SB)/8, $0xffffffffff2a2928
170DATA expandAVX512_3_inShuf2<>+0x20(SB)/8, $0xffffffffff2a2928
171DATA expandAVX512_3_inShuf2<>+0x28(SB)/8, $0xffffffffffff2928
172DATA expandAVX512_3_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
173DATA expandAVX512_3_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
174
175GLOBL expandAVX512_3_outShufLo(SB), RODATA, $0x40
176DATA expandAVX512_3_outShufLo+0x00(SB)/8, $0x0a02110901100800
177DATA expandAVX512_3_outShufLo+0x08(SB)/8, $0x05140c04130b0312
178DATA expandAVX512_3_outShufLo+0x10(SB)/8, $0x170f07160e06150d
179DATA expandAVX512_3_outShufLo+0x18(SB)/8, $0x221a292119282018
180DATA expandAVX512_3_outShufLo+0x20(SB)/8, $0x1d2c241c2b231b2a
181DATA expandAVX512_3_outShufLo+0x28(SB)/8, $0x2f271f2e261e2d25
182DATA expandAVX512_3_outShufLo+0x30(SB)/8, $0x4a42514941504840
183DATA expandAVX512_3_outShufLo+0x38(SB)/8, $0x45544c44534b4352
184
185GLOBL expandAVX512_3_outShufHi(SB), RODATA, $0x40
186DATA expandAVX512_3_outShufHi+0x00(SB)/8, $0x170f07160e06150d
187DATA expandAVX512_3_outShufHi+0x08(SB)/8, $0x221a292119282018
188DATA expandAVX512_3_outShufHi+0x10(SB)/8, $0x1d2c241c2b231b2a
189DATA expandAVX512_3_outShufHi+0x18(SB)/8, $0x2f271f2e261e2d25
190DATA expandAVX512_3_outShufHi+0x20(SB)/8, $0x4a42514941504840
191DATA expandAVX512_3_outShufHi+0x28(SB)/8, $0x45544c44534b4352
192DATA expandAVX512_3_outShufHi+0x30(SB)/8, $0x574f47564e46554d
193DATA expandAVX512_3_outShufHi+0x38(SB)/8, $0x625a696159686058
194
195TEXT expandAVX512_3<>(SB), NOSPLIT, $0-0
196 VMOVDQU64 expandAVX512_3_inShuf0<>(SB), Z0
197 VMOVDQU64 expandAVX512_3_mat0<>(SB), Z3
198 VMOVDQU64 expandAVX512_3_inShuf1<>(SB), Z4
199 VMOVDQU64 expandAVX512_3_inShuf2<>(SB), Z5
200 VMOVDQU64 expandAVX512_3_outShufLo(SB), Z1
201 VMOVDQU64 expandAVX512_3_outShufHi(SB), Z2
202 VMOVDQU64 (AX), Z6
203 VPERMB Z6, Z0, Z0
204 VGF2P8AFFINEQB $0, Z3, Z0, Z0
205 VPERMB Z6, Z4, Z4
206 VGF2P8AFFINEQB $0, Z3, Z4, Z4
207 VPERMB Z6, Z5, Z5
208 VGF2P8AFFINEQB $0, Z3, Z5, Z3
209 VPERMI2B Z4, Z0, Z1
210 VPERMI2B Z3, Z4, Z2
211 RET
212
213GLOBL expandAVX512_4_inShuf0<>(SB), RODATA, $0x40
214DATA expandAVX512_4_inShuf0<>+0x00(SB)/8, $0x0706050403020100
215DATA expandAVX512_4_inShuf0<>+0x08(SB)/8, $0x0706050403020100
216DATA expandAVX512_4_inShuf0<>+0x10(SB)/8, $0x0706050403020100
217DATA expandAVX512_4_inShuf0<>+0x18(SB)/8, $0x0706050403020100
218DATA expandAVX512_4_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
219DATA expandAVX512_4_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
220DATA expandAVX512_4_inShuf0<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
221DATA expandAVX512_4_inShuf0<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
222
223GLOBL expandAVX512_4_mat0<>(SB), RODATA, $0x40
224DATA expandAVX512_4_mat0<>+0x00(SB)/8, $0x0101010102020202
225DATA expandAVX512_4_mat0<>+0x08(SB)/8, $0x0404040408080808
226DATA expandAVX512_4_mat0<>+0x10(SB)/8, $0x1010101020202020
227DATA expandAVX512_4_mat0<>+0x18(SB)/8, $0x4040404080808080
228DATA expandAVX512_4_mat0<>+0x20(SB)/8, $0x0101010102020202
229DATA expandAVX512_4_mat0<>+0x28(SB)/8, $0x0404040408080808
230DATA expandAVX512_4_mat0<>+0x30(SB)/8, $0x1010101020202020
231DATA expandAVX512_4_mat0<>+0x38(SB)/8, $0x4040404080808080
232
233GLOBL expandAVX512_4_inShuf1<>(SB), RODATA, $0x40
234DATA expandAVX512_4_inShuf1<>+0x00(SB)/8, $0x1716151413121110
235DATA expandAVX512_4_inShuf1<>+0x08(SB)/8, $0x1716151413121110
236DATA expandAVX512_4_inShuf1<>+0x10(SB)/8, $0x1716151413121110
237DATA expandAVX512_4_inShuf1<>+0x18(SB)/8, $0x1716151413121110
238DATA expandAVX512_4_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
239DATA expandAVX512_4_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
240DATA expandAVX512_4_inShuf1<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
241DATA expandAVX512_4_inShuf1<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
242
243GLOBL expandAVX512_4_outShufLo(SB), RODATA, $0x40
244DATA expandAVX512_4_outShufLo+0x00(SB)/8, $0x1911090118100800
245DATA expandAVX512_4_outShufLo+0x08(SB)/8, $0x1b130b031a120a02
246DATA expandAVX512_4_outShufLo+0x10(SB)/8, $0x1d150d051c140c04
247DATA expandAVX512_4_outShufLo+0x18(SB)/8, $0x1f170f071e160e06
248DATA expandAVX512_4_outShufLo+0x20(SB)/8, $0x3931292138302820
249DATA expandAVX512_4_outShufLo+0x28(SB)/8, $0x3b332b233a322a22
250DATA expandAVX512_4_outShufLo+0x30(SB)/8, $0x3d352d253c342c24
251DATA expandAVX512_4_outShufLo+0x38(SB)/8, $0x3f372f273e362e26
252
253TEXT expandAVX512_4<>(SB), NOSPLIT, $0-0
254 VMOVDQU64 expandAVX512_4_inShuf0<>(SB), Z0
255 VMOVDQU64 expandAVX512_4_mat0<>(SB), Z1
256 VMOVDQU64 expandAVX512_4_inShuf1<>(SB), Z2
257 VMOVDQU64 expandAVX512_4_outShufLo(SB), Z3
258 VMOVDQU64 (AX), Z4
259 VPERMB Z4, Z0, Z0
260 VGF2P8AFFINEQB $0, Z1, Z0, Z0
261 VPERMB Z4, Z2, Z2
262 VGF2P8AFFINEQB $0, Z1, Z2, Z2
263 VPERMB Z0, Z3, Z1
264 VPERMB Z2, Z3, Z2
265 RET
266
267GLOBL expandAVX512_6_inShuf0<>(SB), RODATA, $0x40
268DATA expandAVX512_6_inShuf0<>+0x00(SB)/8, $0x0706050403020100
269DATA expandAVX512_6_inShuf0<>+0x08(SB)/8, $0x0706050403020100
270DATA expandAVX512_6_inShuf0<>+0x10(SB)/8, $0x0706050403020100
271DATA expandAVX512_6_inShuf0<>+0x18(SB)/8, $0x0706050403020100
272DATA expandAVX512_6_inShuf0<>+0x20(SB)/8, $0x0706050403020100
273DATA expandAVX512_6_inShuf0<>+0x28(SB)/8, $0x0706050403020100
274DATA expandAVX512_6_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
275DATA expandAVX512_6_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
276
277GLOBL expandAVX512_6_mat0<>(SB), RODATA, $0x40
278DATA expandAVX512_6_mat0<>+0x00(SB)/8, $0x0101010101010202
279DATA expandAVX512_6_mat0<>+0x08(SB)/8, $0x0202020204040404
280DATA expandAVX512_6_mat0<>+0x10(SB)/8, $0x0404080808080808
281DATA expandAVX512_6_mat0<>+0x18(SB)/8, $0x1010101010102020
282DATA expandAVX512_6_mat0<>+0x20(SB)/8, $0x2020202040404040
283DATA expandAVX512_6_mat0<>+0x28(SB)/8, $0x4040808080808080
284DATA expandAVX512_6_mat0<>+0x30(SB)/8, $0x0000000000000000
285DATA expandAVX512_6_mat0<>+0x38(SB)/8, $0x0000000000000000
286
287GLOBL expandAVX512_6_inShuf1<>(SB), RODATA, $0x40
288DATA expandAVX512_6_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
289DATA expandAVX512_6_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
290DATA expandAVX512_6_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
291DATA expandAVX512_6_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
292DATA expandAVX512_6_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
293DATA expandAVX512_6_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
294DATA expandAVX512_6_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
295DATA expandAVX512_6_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
296
297GLOBL expandAVX512_6_inShuf2<>(SB), RODATA, $0x40
298DATA expandAVX512_6_inShuf2<>+0x00(SB)/8, $0xffff151413121110
299DATA expandAVX512_6_inShuf2<>+0x08(SB)/8, $0xffff151413121110
300DATA expandAVX512_6_inShuf2<>+0x10(SB)/8, $0xffffff1413121110
301DATA expandAVX512_6_inShuf2<>+0x18(SB)/8, $0xffffff1413121110
302DATA expandAVX512_6_inShuf2<>+0x20(SB)/8, $0xffffff1413121110
303DATA expandAVX512_6_inShuf2<>+0x28(SB)/8, $0xffffff1413121110
304DATA expandAVX512_6_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
305DATA expandAVX512_6_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
306
307GLOBL expandAVX512_6_outShufLo(SB), RODATA, $0x40
308DATA expandAVX512_6_outShufLo+0x00(SB)/8, $0x0901282018100800
309DATA expandAVX512_6_outShufLo+0x08(SB)/8, $0x1a120a0229211911
310DATA expandAVX512_6_outShufLo+0x10(SB)/8, $0x2b231b130b032a22
311DATA expandAVX512_6_outShufLo+0x18(SB)/8, $0x0d052c241c140c04
312DATA expandAVX512_6_outShufLo+0x20(SB)/8, $0x1e160e062d251d15
313DATA expandAVX512_6_outShufLo+0x28(SB)/8, $0x2f271f170f072e26
314DATA expandAVX512_6_outShufLo+0x30(SB)/8, $0x4941686058504840
315DATA expandAVX512_6_outShufLo+0x38(SB)/8, $0x5a524a4269615951
316
317GLOBL expandAVX512_6_outShufHi(SB), RODATA, $0x40
318DATA expandAVX512_6_outShufHi+0x00(SB)/8, $0x2b231b130b032a22
319DATA expandAVX512_6_outShufHi+0x08(SB)/8, $0x0d052c241c140c04
320DATA expandAVX512_6_outShufHi+0x10(SB)/8, $0x1e160e062d251d15
321DATA expandAVX512_6_outShufHi+0x18(SB)/8, $0x2f271f170f072e26
322DATA expandAVX512_6_outShufHi+0x20(SB)/8, $0x4941686058504840
323DATA expandAVX512_6_outShufHi+0x28(SB)/8, $0x5a524a4269615951
324DATA expandAVX512_6_outShufHi+0x30(SB)/8, $0x6b635b534b436a62
325DATA expandAVX512_6_outShufHi+0x38(SB)/8, $0x4d456c645c544c44
326
327TEXT expandAVX512_6<>(SB), NOSPLIT, $0-0
328 VMOVDQU64 expandAVX512_6_inShuf0<>(SB), Z0
329 VMOVDQU64 expandAVX512_6_mat0<>(SB), Z3
330 VMOVDQU64 expandAVX512_6_inShuf1<>(SB), Z4
331 VMOVDQU64 expandAVX512_6_inShuf2<>(SB), Z5
332 VMOVDQU64 expandAVX512_6_outShufLo(SB), Z1
333 VMOVDQU64 expandAVX512_6_outShufHi(SB), Z2
334 VMOVDQU64 (AX), Z6
335 VPERMB Z6, Z0, Z0
336 VGF2P8AFFINEQB $0, Z3, Z0, Z0
337 VPERMB Z6, Z4, Z4
338 VGF2P8AFFINEQB $0, Z3, Z4, Z4
339 VPERMB Z6, Z5, Z5
340 VGF2P8AFFINEQB $0, Z3, Z5, Z3
341 VPERMI2B Z4, Z0, Z1
342 VPERMI2B Z3, Z4, Z2
343 RET
344
345GLOBL expandAVX512_8_inShuf0<>(SB), RODATA, $0x40
346DATA expandAVX512_8_inShuf0<>+0x00(SB)/8, $0x0706050403020100
347DATA expandAVX512_8_inShuf0<>+0x08(SB)/8, $0x0706050403020100
348DATA expandAVX512_8_inShuf0<>+0x10(SB)/8, $0x0706050403020100
349DATA expandAVX512_8_inShuf0<>+0x18(SB)/8, $0x0706050403020100
350DATA expandAVX512_8_inShuf0<>+0x20(SB)/8, $0x0706050403020100
351DATA expandAVX512_8_inShuf0<>+0x28(SB)/8, $0x0706050403020100
352DATA expandAVX512_8_inShuf0<>+0x30(SB)/8, $0x0706050403020100
353DATA expandAVX512_8_inShuf0<>+0x38(SB)/8, $0x0706050403020100
354
355GLOBL expandAVX512_8_mat0<>(SB), RODATA, $0x40
356DATA expandAVX512_8_mat0<>+0x00(SB)/8, $0x0101010101010101
357DATA expandAVX512_8_mat0<>+0x08(SB)/8, $0x0202020202020202
358DATA expandAVX512_8_mat0<>+0x10(SB)/8, $0x0404040404040404
359DATA expandAVX512_8_mat0<>+0x18(SB)/8, $0x0808080808080808
360DATA expandAVX512_8_mat0<>+0x20(SB)/8, $0x1010101010101010
361DATA expandAVX512_8_mat0<>+0x28(SB)/8, $0x2020202020202020
362DATA expandAVX512_8_mat0<>+0x30(SB)/8, $0x4040404040404040
363DATA expandAVX512_8_mat0<>+0x38(SB)/8, $0x8080808080808080
364
365GLOBL expandAVX512_8_inShuf1<>(SB), RODATA, $0x40
366DATA expandAVX512_8_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
367DATA expandAVX512_8_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
368DATA expandAVX512_8_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
369DATA expandAVX512_8_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
370DATA expandAVX512_8_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
371DATA expandAVX512_8_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
372DATA expandAVX512_8_inShuf1<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
373DATA expandAVX512_8_inShuf1<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
374
375GLOBL expandAVX512_8_outShufLo(SB), RODATA, $0x40
376DATA expandAVX512_8_outShufLo+0x00(SB)/8, $0x3830282018100800
377DATA expandAVX512_8_outShufLo+0x08(SB)/8, $0x3931292119110901
378DATA expandAVX512_8_outShufLo+0x10(SB)/8, $0x3a322a221a120a02
379DATA expandAVX512_8_outShufLo+0x18(SB)/8, $0x3b332b231b130b03
380DATA expandAVX512_8_outShufLo+0x20(SB)/8, $0x3c342c241c140c04
381DATA expandAVX512_8_outShufLo+0x28(SB)/8, $0x3d352d251d150d05
382DATA expandAVX512_8_outShufLo+0x30(SB)/8, $0x3e362e261e160e06
383DATA expandAVX512_8_outShufLo+0x38(SB)/8, $0x3f372f271f170f07
384
385TEXT expandAVX512_8<>(SB), NOSPLIT, $0-0
386 VMOVDQU64 expandAVX512_8_inShuf0<>(SB), Z0
387 VMOVDQU64 expandAVX512_8_mat0<>(SB), Z1
388 VMOVDQU64 expandAVX512_8_inShuf1<>(SB), Z2
389 VMOVDQU64 expandAVX512_8_outShufLo(SB), Z3
390 VMOVDQU64 (AX), Z4
391 VPERMB Z4, Z0, Z0
392 VGF2P8AFFINEQB $0, Z1, Z0, Z0
393 VPERMB Z4, Z2, Z2
394 VGF2P8AFFINEQB $0, Z1, Z2, Z2
395 VPERMB Z0, Z3, Z1
396 VPERMB Z2, Z3, Z2
397 RET
398
399GLOBL expandAVX512_10_inShuf0<>(SB), RODATA, $0x40
400DATA expandAVX512_10_inShuf0<>+0x00(SB)/8, $0xff06050403020100
401DATA expandAVX512_10_inShuf0<>+0x08(SB)/8, $0xff06050403020100
402DATA expandAVX512_10_inShuf0<>+0x10(SB)/8, $0xff06050403020100
403DATA expandAVX512_10_inShuf0<>+0x18(SB)/8, $0xff06050403020100
404DATA expandAVX512_10_inShuf0<>+0x20(SB)/8, $0xffff050403020100
405DATA expandAVX512_10_inShuf0<>+0x28(SB)/8, $0xffff050403020100
406DATA expandAVX512_10_inShuf0<>+0x30(SB)/8, $0xffff050403020100
407DATA expandAVX512_10_inShuf0<>+0x38(SB)/8, $0xffff050403020100
408
409GLOBL expandAVX512_10_mat0<>(SB), RODATA, $0x40
410DATA expandAVX512_10_mat0<>+0x00(SB)/8, $0x0101010101010101
411DATA expandAVX512_10_mat0<>+0x08(SB)/8, $0x0101020202020202
412DATA expandAVX512_10_mat0<>+0x10(SB)/8, $0x0202020204040404
413DATA expandAVX512_10_mat0<>+0x18(SB)/8, $0x0404040404040808
414DATA expandAVX512_10_mat0<>+0x20(SB)/8, $0x0808080808080808
415DATA expandAVX512_10_mat0<>+0x28(SB)/8, $0x1010101010101010
416DATA expandAVX512_10_mat0<>+0x30(SB)/8, $0x1010202020202020
417DATA expandAVX512_10_mat0<>+0x38(SB)/8, $0x2020202040404040
418
419GLOBL expandAVX512_10_inShuf1<>(SB), RODATA, $0x40
420DATA expandAVX512_10_inShuf1<>+0x00(SB)/8, $0xffff050403020100
421DATA expandAVX512_10_inShuf1<>+0x08(SB)/8, $0xffff050403020100
422DATA expandAVX512_10_inShuf1<>+0x10(SB)/8, $0xff0c0b0a09080706
423DATA expandAVX512_10_inShuf1<>+0x18(SB)/8, $0xff0c0b0a09080706
424DATA expandAVX512_10_inShuf1<>+0x20(SB)/8, $0xff0c0b0a09080706
425DATA expandAVX512_10_inShuf1<>+0x28(SB)/8, $0xff0c0b0a09080706
426DATA expandAVX512_10_inShuf1<>+0x30(SB)/8, $0xffff0b0a09080706
427DATA expandAVX512_10_inShuf1<>+0x38(SB)/8, $0xffff0b0a09080706
428
429GLOBL expandAVX512_10_mat1<>(SB), RODATA, $0x40
430DATA expandAVX512_10_mat1<>+0x00(SB)/8, $0x4040404040408080
431DATA expandAVX512_10_mat1<>+0x08(SB)/8, $0x8080808080808080
432DATA expandAVX512_10_mat1<>+0x10(SB)/8, $0x0808080808080808
433DATA expandAVX512_10_mat1<>+0x18(SB)/8, $0x1010101010101010
434DATA expandAVX512_10_mat1<>+0x20(SB)/8, $0x1010202020202020
435DATA expandAVX512_10_mat1<>+0x28(SB)/8, $0x2020202040404040
436DATA expandAVX512_10_mat1<>+0x30(SB)/8, $0x4040404040408080
437DATA expandAVX512_10_mat1<>+0x38(SB)/8, $0x8080808080808080
438
439GLOBL expandAVX512_10_inShuf2<>(SB), RODATA, $0x40
440DATA expandAVX512_10_inShuf2<>+0x00(SB)/8, $0xffff0c0b0a090807
441DATA expandAVX512_10_inShuf2<>+0x08(SB)/8, $0xffff0c0b0a090807
442DATA expandAVX512_10_inShuf2<>+0x10(SB)/8, $0xffff0c0b0a090807
443DATA expandAVX512_10_inShuf2<>+0x18(SB)/8, $0xffff0c0b0a090807
444DATA expandAVX512_10_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
445DATA expandAVX512_10_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
446DATA expandAVX512_10_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
447DATA expandAVX512_10_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
448
449GLOBL expandAVX512_10_mat2<>(SB), RODATA, $0x40
450DATA expandAVX512_10_mat2<>+0x00(SB)/8, $0x0101010101010101
451DATA expandAVX512_10_mat2<>+0x08(SB)/8, $0x0101020202020202
452DATA expandAVX512_10_mat2<>+0x10(SB)/8, $0x0202020204040404
453DATA expandAVX512_10_mat2<>+0x18(SB)/8, $0x0404040404040808
454DATA expandAVX512_10_mat2<>+0x20(SB)/8, $0x0000000000000000
455DATA expandAVX512_10_mat2<>+0x28(SB)/8, $0x0000000000000000
456DATA expandAVX512_10_mat2<>+0x30(SB)/8, $0x0000000000000000
457DATA expandAVX512_10_mat2<>+0x38(SB)/8, $0x0000000000000000
458
459GLOBL expandAVX512_10_outShufLo(SB), RODATA, $0x40
460DATA expandAVX512_10_outShufLo+0x00(SB)/8, $0x3830282018100800
461DATA expandAVX512_10_outShufLo+0x08(SB)/8, $0x2921191109014840
462DATA expandAVX512_10_outShufLo+0x10(SB)/8, $0x1a120a0249413931
463DATA expandAVX512_10_outShufLo+0x18(SB)/8, $0x0b034a423a322a22
464DATA expandAVX512_10_outShufLo+0x20(SB)/8, $0x4b433b332b231b13
465DATA expandAVX512_10_outShufLo+0x28(SB)/8, $0x3c342c241c140c04
466DATA expandAVX512_10_outShufLo+0x30(SB)/8, $0x2d251d150d054c44
467DATA expandAVX512_10_outShufLo+0x38(SB)/8, $0x1e160e064d453d35
468
469GLOBL expandAVX512_10_outShufHi(SB), RODATA, $0x40
470DATA expandAVX512_10_outShufHi+0x00(SB)/8, $0x4840383028201810
471DATA expandAVX512_10_outShufHi+0x08(SB)/8, $0x3931292119115850
472DATA expandAVX512_10_outShufHi+0x10(SB)/8, $0x2a221a1259514941
473DATA expandAVX512_10_outShufHi+0x18(SB)/8, $0x1b135a524a423a32
474DATA expandAVX512_10_outShufHi+0x20(SB)/8, $0x5b534b433b332b23
475DATA expandAVX512_10_outShufHi+0x28(SB)/8, $0x4c443c342c241c14
476DATA expandAVX512_10_outShufHi+0x30(SB)/8, $0x3d352d251d155c54
477DATA expandAVX512_10_outShufHi+0x38(SB)/8, $0x2e261e165d554d45
478
479TEXT expandAVX512_10<>(SB), NOSPLIT, $0-0
480 VMOVDQU64 expandAVX512_10_inShuf0<>(SB), Z0
481 VMOVDQU64 expandAVX512_10_inShuf1<>(SB), Z3
482 VMOVDQU64 expandAVX512_10_inShuf2<>(SB), Z4
483 VMOVDQU64 expandAVX512_10_outShufLo(SB), Z1
484 VMOVDQU64 expandAVX512_10_outShufHi(SB), Z2
485 VMOVDQU64 (AX), Z5
486 VPERMB Z5, Z0, Z0
487 VGF2P8AFFINEQB $0, expandAVX512_10_mat0<>(SB), Z0, Z0
488 VPERMB Z5, Z3, Z3
489 VGF2P8AFFINEQB $0, expandAVX512_10_mat1<>(SB), Z3, Z3
490 VPERMB Z5, Z4, Z4
491 VGF2P8AFFINEQB $0, expandAVX512_10_mat2<>(SB), Z4, Z4
492 VPERMI2B Z3, Z0, Z1
493 VPERMI2B Z4, Z3, Z2
494 RET
495
496GLOBL expandAVX512_12_inShuf0<>(SB), RODATA, $0x40
497DATA expandAVX512_12_inShuf0<>+0x00(SB)/8, $0xffff050403020100
498DATA expandAVX512_12_inShuf0<>+0x08(SB)/8, $0xffff050403020100
499DATA expandAVX512_12_inShuf0<>+0x10(SB)/8, $0xffff050403020100
500DATA expandAVX512_12_inShuf0<>+0x18(SB)/8, $0xffff050403020100
501DATA expandAVX512_12_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
502DATA expandAVX512_12_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
503DATA expandAVX512_12_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
504DATA expandAVX512_12_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
505
506GLOBL expandAVX512_12_mat0<>(SB), RODATA, $0x40
507DATA expandAVX512_12_mat0<>+0x00(SB)/8, $0x0101010101010101
508DATA expandAVX512_12_mat0<>+0x08(SB)/8, $0x0101010102020202
509DATA expandAVX512_12_mat0<>+0x10(SB)/8, $0x0202020202020202
510DATA expandAVX512_12_mat0<>+0x18(SB)/8, $0x0404040404040404
511DATA expandAVX512_12_mat0<>+0x20(SB)/8, $0x0404040408080808
512DATA expandAVX512_12_mat0<>+0x28(SB)/8, $0x0808080808080808
513DATA expandAVX512_12_mat0<>+0x30(SB)/8, $0x1010101010101010
514DATA expandAVX512_12_mat0<>+0x38(SB)/8, $0x1010101020202020
515
516GLOBL expandAVX512_12_inShuf1<>(SB), RODATA, $0x40
517DATA expandAVX512_12_inShuf1<>+0x00(SB)/8, $0xffffff0403020100
518DATA expandAVX512_12_inShuf1<>+0x08(SB)/8, $0xffffff0403020100
519DATA expandAVX512_12_inShuf1<>+0x10(SB)/8, $0xffffff0403020100
520DATA expandAVX512_12_inShuf1<>+0x18(SB)/8, $0xffffff0403020100
521DATA expandAVX512_12_inShuf1<>+0x20(SB)/8, $0xffff0a0908070605
522DATA expandAVX512_12_inShuf1<>+0x28(SB)/8, $0xffff0a0908070605
523DATA expandAVX512_12_inShuf1<>+0x30(SB)/8, $0xffff0a0908070605
524DATA expandAVX512_12_inShuf1<>+0x38(SB)/8, $0xffff0a0908070605
525
526GLOBL expandAVX512_12_mat1<>(SB), RODATA, $0x40
527DATA expandAVX512_12_mat1<>+0x00(SB)/8, $0x2020202020202020
528DATA expandAVX512_12_mat1<>+0x08(SB)/8, $0x4040404040404040
529DATA expandAVX512_12_mat1<>+0x10(SB)/8, $0x4040404080808080
530DATA expandAVX512_12_mat1<>+0x18(SB)/8, $0x8080808080808080
531DATA expandAVX512_12_mat1<>+0x20(SB)/8, $0x0404040408080808
532DATA expandAVX512_12_mat1<>+0x28(SB)/8, $0x0808080808080808
533DATA expandAVX512_12_mat1<>+0x30(SB)/8, $0x1010101010101010
534DATA expandAVX512_12_mat1<>+0x38(SB)/8, $0x1010101020202020
535
536GLOBL expandAVX512_12_inShuf2<>(SB), RODATA, $0x40
537DATA expandAVX512_12_inShuf2<>+0x00(SB)/8, $0xffffff0908070605
538DATA expandAVX512_12_inShuf2<>+0x08(SB)/8, $0xffffff0908070605
539DATA expandAVX512_12_inShuf2<>+0x10(SB)/8, $0xffffff0908070605
540DATA expandAVX512_12_inShuf2<>+0x18(SB)/8, $0xffffff0908070605
541DATA expandAVX512_12_inShuf2<>+0x20(SB)/8, $0xffffff0a09080706
542DATA expandAVX512_12_inShuf2<>+0x28(SB)/8, $0xffffff0a09080706
543DATA expandAVX512_12_inShuf2<>+0x30(SB)/8, $0xffffff0a09080706
544DATA expandAVX512_12_inShuf2<>+0x38(SB)/8, $0xffffff0a09080706
545
546GLOBL expandAVX512_12_mat2<>(SB), RODATA, $0x40
547DATA expandAVX512_12_mat2<>+0x00(SB)/8, $0x2020202020202020
548DATA expandAVX512_12_mat2<>+0x08(SB)/8, $0x4040404040404040
549DATA expandAVX512_12_mat2<>+0x10(SB)/8, $0x4040404080808080
550DATA expandAVX512_12_mat2<>+0x18(SB)/8, $0x8080808080808080
551DATA expandAVX512_12_mat2<>+0x20(SB)/8, $0x0101010101010101
552DATA expandAVX512_12_mat2<>+0x28(SB)/8, $0x0101010102020202
553DATA expandAVX512_12_mat2<>+0x30(SB)/8, $0x0202020202020202
554DATA expandAVX512_12_mat2<>+0x38(SB)/8, $0x0404040404040404
555
556GLOBL expandAVX512_12_outShufLo(SB), RODATA, $0x40
557DATA expandAVX512_12_outShufLo+0x00(SB)/8, $0x3830282018100800
558DATA expandAVX512_12_outShufLo+0x08(SB)/8, $0x1911090158504840
559DATA expandAVX512_12_outShufLo+0x10(SB)/8, $0x5951494139312921
560DATA expandAVX512_12_outShufLo+0x18(SB)/8, $0x3a322a221a120a02
561DATA expandAVX512_12_outShufLo+0x20(SB)/8, $0x1b130b035a524a42
562DATA expandAVX512_12_outShufLo+0x28(SB)/8, $0x5b534b433b332b23
563DATA expandAVX512_12_outShufLo+0x30(SB)/8, $0x3c342c241c140c04
564DATA expandAVX512_12_outShufLo+0x38(SB)/8, $0x1d150d055c544c44
565
566GLOBL expandAVX512_12_outShufHi(SB), RODATA, $0x40
567DATA expandAVX512_12_outShufHi+0x00(SB)/8, $0x5850484038302820
568DATA expandAVX512_12_outShufHi+0x08(SB)/8, $0x3931292178706860
569DATA expandAVX512_12_outShufHi+0x10(SB)/8, $0x7971696159514941
570DATA expandAVX512_12_outShufHi+0x18(SB)/8, $0x5a524a423a322a22
571DATA expandAVX512_12_outShufHi+0x20(SB)/8, $0x3b332b237a726a62
572DATA expandAVX512_12_outShufHi+0x28(SB)/8, $0x7b736b635b534b43
573DATA expandAVX512_12_outShufHi+0x30(SB)/8, $0x5c544c443c342c24
574DATA expandAVX512_12_outShufHi+0x38(SB)/8, $0x3d352d257c746c64
575
576TEXT expandAVX512_12<>(SB), NOSPLIT, $0-0
577 VMOVDQU64 expandAVX512_12_inShuf0<>(SB), Z0
578 VMOVDQU64 expandAVX512_12_inShuf1<>(SB), Z3
579 VMOVDQU64 expandAVX512_12_inShuf2<>(SB), Z4
580 VMOVDQU64 expandAVX512_12_outShufLo(SB), Z1
581 VMOVDQU64 expandAVX512_12_outShufHi(SB), Z2
582 VMOVDQU64 (AX), Z5
583 VPERMB Z5, Z0, Z0
584 VGF2P8AFFINEQB $0, expandAVX512_12_mat0<>(SB), Z0, Z0
585 VPERMB Z5, Z3, Z3
586 VGF2P8AFFINEQB $0, expandAVX512_12_mat1<>(SB), Z3, Z3
587 VPERMB Z5, Z4, Z4
588 VGF2P8AFFINEQB $0, expandAVX512_12_mat2<>(SB), Z4, Z4
589 VPERMI2B Z3, Z0, Z1
590 VPERMI2B Z4, Z3, Z2
591 RET
592
593GLOBL expandAVX512_14_inShuf0<>(SB), RODATA, $0x40
594DATA expandAVX512_14_inShuf0<>+0x00(SB)/8, $0xffffff0403020100
595DATA expandAVX512_14_inShuf0<>+0x08(SB)/8, $0xffffff0403020100
596DATA expandAVX512_14_inShuf0<>+0x10(SB)/8, $0xffffff0403020100
597DATA expandAVX512_14_inShuf0<>+0x18(SB)/8, $0xffffff0403020100
598DATA expandAVX512_14_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
599DATA expandAVX512_14_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
600DATA expandAVX512_14_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
601DATA expandAVX512_14_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
602
603GLOBL expandAVX512_14_mat0<>(SB), RODATA, $0x40
604DATA expandAVX512_14_mat0<>+0x00(SB)/8, $0x0101010101010101
605DATA expandAVX512_14_mat0<>+0x08(SB)/8, $0x0101010101010202
606DATA expandAVX512_14_mat0<>+0x10(SB)/8, $0x0202020202020202
607DATA expandAVX512_14_mat0<>+0x18(SB)/8, $0x0202020204040404
608DATA expandAVX512_14_mat0<>+0x20(SB)/8, $0x0404040404040404
609DATA expandAVX512_14_mat0<>+0x28(SB)/8, $0x0404080808080808
610DATA expandAVX512_14_mat0<>+0x30(SB)/8, $0x0808080808080808
611DATA expandAVX512_14_mat0<>+0x38(SB)/8, $0x1010101010101010
612
613GLOBL expandAVX512_14_inShuf1<>(SB), RODATA, $0x40
614DATA expandAVX512_14_inShuf1<>+0x00(SB)/8, $0xffffffff03020100
615DATA expandAVX512_14_inShuf1<>+0x08(SB)/8, $0xffffffff03020100
616DATA expandAVX512_14_inShuf1<>+0x10(SB)/8, $0xffffffff03020100
617DATA expandAVX512_14_inShuf1<>+0x18(SB)/8, $0xffffffff03020100
618DATA expandAVX512_14_inShuf1<>+0x20(SB)/8, $0xffffffff03020100
619DATA expandAVX512_14_inShuf1<>+0x28(SB)/8, $0xffffffff03020100
620DATA expandAVX512_14_inShuf1<>+0x30(SB)/8, $0xffffff0807060504
621DATA expandAVX512_14_inShuf1<>+0x38(SB)/8, $0xffffff0807060504
622
623GLOBL expandAVX512_14_mat1<>(SB), RODATA, $0x40
624DATA expandAVX512_14_mat1<>+0x00(SB)/8, $0x1010101010102020
625DATA expandAVX512_14_mat1<>+0x08(SB)/8, $0x2020202020202020
626DATA expandAVX512_14_mat1<>+0x10(SB)/8, $0x2020202040404040
627DATA expandAVX512_14_mat1<>+0x18(SB)/8, $0x4040404040404040
628DATA expandAVX512_14_mat1<>+0x20(SB)/8, $0x4040808080808080
629DATA expandAVX512_14_mat1<>+0x28(SB)/8, $0x8080808080808080
630DATA expandAVX512_14_mat1<>+0x30(SB)/8, $0x1010101010102020
631DATA expandAVX512_14_mat1<>+0x38(SB)/8, $0x2020202020202020
632
633GLOBL expandAVX512_14_inShuf2<>(SB), RODATA, $0x40
634DATA expandAVX512_14_inShuf2<>+0x00(SB)/8, $0xffffff0807060504
635DATA expandAVX512_14_inShuf2<>+0x08(SB)/8, $0xffffff0807060504
636DATA expandAVX512_14_inShuf2<>+0x10(SB)/8, $0xffffff0807060504
637DATA expandAVX512_14_inShuf2<>+0x18(SB)/8, $0xffffff0807060504
638DATA expandAVX512_14_inShuf2<>+0x20(SB)/8, $0xffffff0908070605
639DATA expandAVX512_14_inShuf2<>+0x28(SB)/8, $0xffffff0908070605
640DATA expandAVX512_14_inShuf2<>+0x30(SB)/8, $0xffffffff08070605
641DATA expandAVX512_14_inShuf2<>+0x38(SB)/8, $0xffffffff08070605
642
643GLOBL expandAVX512_14_mat2<>(SB), RODATA, $0x40
644DATA expandAVX512_14_mat2<>+0x00(SB)/8, $0x2020202040404040
645DATA expandAVX512_14_mat2<>+0x08(SB)/8, $0x4040404040404040
646DATA expandAVX512_14_mat2<>+0x10(SB)/8, $0x4040808080808080
647DATA expandAVX512_14_mat2<>+0x18(SB)/8, $0x8080808080808080
648DATA expandAVX512_14_mat2<>+0x20(SB)/8, $0x0101010101010101
649DATA expandAVX512_14_mat2<>+0x28(SB)/8, $0x0101010101010202
650DATA expandAVX512_14_mat2<>+0x30(SB)/8, $0x0202020202020202
651DATA expandAVX512_14_mat2<>+0x38(SB)/8, $0x0202020204040404
652
653GLOBL expandAVX512_14_inShuf3<>(SB), RODATA, $0x40
654DATA expandAVX512_14_inShuf3<>+0x00(SB)/8, $0xffffffff08070605
655DATA expandAVX512_14_inShuf3<>+0x08(SB)/8, $0xffffffff08070605
656DATA expandAVX512_14_inShuf3<>+0x10(SB)/8, $0xffffffff08070605
657DATA expandAVX512_14_inShuf3<>+0x18(SB)/8, $0xffffffff08070605
658DATA expandAVX512_14_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
659DATA expandAVX512_14_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
660DATA expandAVX512_14_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
661DATA expandAVX512_14_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
662
663GLOBL expandAVX512_14_mat3<>(SB), RODATA, $0x40
664DATA expandAVX512_14_mat3<>+0x00(SB)/8, $0x0404040404040404
665DATA expandAVX512_14_mat3<>+0x08(SB)/8, $0x0404080808080808
666DATA expandAVX512_14_mat3<>+0x10(SB)/8, $0x0808080808080808
667DATA expandAVX512_14_mat3<>+0x18(SB)/8, $0x1010101010101010
668DATA expandAVX512_14_mat3<>+0x20(SB)/8, $0x0000000000000000
669DATA expandAVX512_14_mat3<>+0x28(SB)/8, $0x0000000000000000
670DATA expandAVX512_14_mat3<>+0x30(SB)/8, $0x0000000000000000
671DATA expandAVX512_14_mat3<>+0x38(SB)/8, $0x0000000000000000
672
673GLOBL expandAVX512_14_outShufLo(SB), RODATA, $0x40
674DATA expandAVX512_14_outShufLo+0x00(SB)/8, $0x3830282018100800
675DATA expandAVX512_14_outShufLo+0x08(SB)/8, $0x0901686058504840
676DATA expandAVX512_14_outShufLo+0x10(SB)/8, $0x4941393129211911
677DATA expandAVX512_14_outShufLo+0x18(SB)/8, $0x1a120a0269615951
678DATA expandAVX512_14_outShufLo+0x20(SB)/8, $0x5a524a423a322a22
679DATA expandAVX512_14_outShufLo+0x28(SB)/8, $0x2b231b130b036a62
680DATA expandAVX512_14_outShufLo+0x30(SB)/8, $0x6b635b534b433b33
681DATA expandAVX512_14_outShufLo+0x38(SB)/8, $0x3c342c241c140c04
682
683GLOBL expandAVX512_14_outShufHi0(SB), RODATA, $0x40
684DATA expandAVX512_14_outShufHi0+0x00(SB)/8, $0x6860585048403830
685DATA expandAVX512_14_outShufHi0+0x08(SB)/8, $0x3931ffffffff7870
686DATA expandAVX512_14_outShufHi0+0x10(SB)/8, $0x7971696159514941
687DATA expandAVX512_14_outShufHi0+0x18(SB)/8, $0x4a423a32ffffffff
688DATA expandAVX512_14_outShufHi0+0x20(SB)/8, $0xffff7a726a625a52
689DATA expandAVX512_14_outShufHi0+0x28(SB)/8, $0x5b534b433b33ffff
690DATA expandAVX512_14_outShufHi0+0x30(SB)/8, $0xffffffff7b736b63
691DATA expandAVX512_14_outShufHi0+0x38(SB)/8, $0x6c645c544c443c34
692
693GLOBL expandAVX512_14_outShufHi1(SB), RODATA, $0x40
694DATA expandAVX512_14_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
695DATA expandAVX512_14_outShufHi1+0x08(SB)/8, $0xffff18100800ffff
696DATA expandAVX512_14_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
697DATA expandAVX512_14_outShufHi1+0x18(SB)/8, $0xffffffff19110901
698DATA expandAVX512_14_outShufHi1+0x20(SB)/8, $0x0a02ffffffffffff
699DATA expandAVX512_14_outShufHi1+0x28(SB)/8, $0xffffffffffff1a12
700DATA expandAVX512_14_outShufHi1+0x30(SB)/8, $0x1b130b03ffffffff
701DATA expandAVX512_14_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
702
703TEXT expandAVX512_14<>(SB), NOSPLIT, $0-0
704 VMOVDQU64 expandAVX512_14_inShuf0<>(SB), Z0
705 VMOVDQU64 expandAVX512_14_inShuf1<>(SB), Z2
706 VMOVDQU64 expandAVX512_14_inShuf2<>(SB), Z3
707 VMOVDQU64 expandAVX512_14_inShuf3<>(SB), Z4
708 VMOVDQU64 expandAVX512_14_outShufLo(SB), Z1
709 VMOVDQU64 expandAVX512_14_outShufHi0(SB), Z5
710 VMOVDQU64 expandAVX512_14_outShufHi1(SB), Z6
711 VMOVDQU64 (AX), Z7
712 VPERMB Z7, Z0, Z0
713 VGF2P8AFFINEQB $0, expandAVX512_14_mat0<>(SB), Z0, Z0
714 VPERMB Z7, Z2, Z2
715 VGF2P8AFFINEQB $0, expandAVX512_14_mat1<>(SB), Z2, Z2
716 VPERMB Z7, Z3, Z3
717 VGF2P8AFFINEQB $0, expandAVX512_14_mat2<>(SB), Z3, Z3
718 VPERMB Z7, Z4, Z4
719 VGF2P8AFFINEQB $0, expandAVX512_14_mat3<>(SB), Z4, Z4
720 VPERMI2B Z2, Z0, Z1
721 MOVQ $0xff0ffc3ff0ffc3ff, AX
722 KMOVQ AX, K1
723 VPERMI2B.Z Z3, Z2, K1, Z5
724 MOVQ $0xf003c00f003c00, AX
725 KMOVQ AX, K1
726 VPERMB.Z Z4, Z6, K1, Z0
727 VPORQ Z0, Z5, Z2
728 RET
729
730GLOBL expandAVX512_16_inShuf0<>(SB), RODATA, $0x40
731DATA expandAVX512_16_inShuf0<>+0x00(SB)/8, $0x0303020201010000
732DATA expandAVX512_16_inShuf0<>+0x08(SB)/8, $0x0303020201010000
733DATA expandAVX512_16_inShuf0<>+0x10(SB)/8, $0x0303020201010000
734DATA expandAVX512_16_inShuf0<>+0x18(SB)/8, $0x0303020201010000
735DATA expandAVX512_16_inShuf0<>+0x20(SB)/8, $0x0303020201010000
736DATA expandAVX512_16_inShuf0<>+0x28(SB)/8, $0x0303020201010000
737DATA expandAVX512_16_inShuf0<>+0x30(SB)/8, $0x0303020201010000
738DATA expandAVX512_16_inShuf0<>+0x38(SB)/8, $0x0303020201010000
739
740GLOBL expandAVX512_16_mat0<>(SB), RODATA, $0x40
741DATA expandAVX512_16_mat0<>+0x00(SB)/8, $0x0101010101010101
742DATA expandAVX512_16_mat0<>+0x08(SB)/8, $0x0202020202020202
743DATA expandAVX512_16_mat0<>+0x10(SB)/8, $0x0404040404040404
744DATA expandAVX512_16_mat0<>+0x18(SB)/8, $0x0808080808080808
745DATA expandAVX512_16_mat0<>+0x20(SB)/8, $0x1010101010101010
746DATA expandAVX512_16_mat0<>+0x28(SB)/8, $0x2020202020202020
747DATA expandAVX512_16_mat0<>+0x30(SB)/8, $0x4040404040404040
748DATA expandAVX512_16_mat0<>+0x38(SB)/8, $0x8080808080808080
749
750GLOBL expandAVX512_16_inShuf1<>(SB), RODATA, $0x40
751DATA expandAVX512_16_inShuf1<>+0x00(SB)/8, $0x0707060605050404
752DATA expandAVX512_16_inShuf1<>+0x08(SB)/8, $0x0707060605050404
753DATA expandAVX512_16_inShuf1<>+0x10(SB)/8, $0x0707060605050404
754DATA expandAVX512_16_inShuf1<>+0x18(SB)/8, $0x0707060605050404
755DATA expandAVX512_16_inShuf1<>+0x20(SB)/8, $0x0707060605050404
756DATA expandAVX512_16_inShuf1<>+0x28(SB)/8, $0x0707060605050404
757DATA expandAVX512_16_inShuf1<>+0x30(SB)/8, $0x0707060605050404
758DATA expandAVX512_16_inShuf1<>+0x38(SB)/8, $0x0707060605050404
759
760GLOBL expandAVX512_16_outShufLo(SB), RODATA, $0x40
761DATA expandAVX512_16_outShufLo+0x00(SB)/8, $0x1918111009080100
762DATA expandAVX512_16_outShufLo+0x08(SB)/8, $0x3938313029282120
763DATA expandAVX512_16_outShufLo+0x10(SB)/8, $0x1b1a13120b0a0302
764DATA expandAVX512_16_outShufLo+0x18(SB)/8, $0x3b3a33322b2a2322
765DATA expandAVX512_16_outShufLo+0x20(SB)/8, $0x1d1c15140d0c0504
766DATA expandAVX512_16_outShufLo+0x28(SB)/8, $0x3d3c35342d2c2524
767DATA expandAVX512_16_outShufLo+0x30(SB)/8, $0x1f1e17160f0e0706
768DATA expandAVX512_16_outShufLo+0x38(SB)/8, $0x3f3e37362f2e2726
769
770TEXT expandAVX512_16<>(SB), NOSPLIT, $0-0
771 VMOVDQU64 expandAVX512_16_inShuf0<>(SB), Z0
772 VMOVDQU64 expandAVX512_16_mat0<>(SB), Z1
773 VMOVDQU64 expandAVX512_16_inShuf1<>(SB), Z2
774 VMOVDQU64 expandAVX512_16_outShufLo(SB), Z3
775 VMOVDQU64 (AX), Z4
776 VPERMB Z4, Z0, Z0
777 VGF2P8AFFINEQB $0, Z1, Z0, Z0
778 VPERMB Z4, Z2, Z2
779 VGF2P8AFFINEQB $0, Z1, Z2, Z2
780 VPERMB Z0, Z3, Z1
781 VPERMB Z2, Z3, Z2
782 RET
783
784GLOBL expandAVX512_18_inShuf0<>(SB), RODATA, $0x40
785DATA expandAVX512_18_inShuf0<>+0x00(SB)/8, $0x0303020201010000
786DATA expandAVX512_18_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
787DATA expandAVX512_18_inShuf0<>+0x10(SB)/8, $0xffffffff03020100
788DATA expandAVX512_18_inShuf0<>+0x18(SB)/8, $0xffffffff03020100
789DATA expandAVX512_18_inShuf0<>+0x20(SB)/8, $0xffffffff03020100
790DATA expandAVX512_18_inShuf0<>+0x28(SB)/8, $0xffffffff03020100
791DATA expandAVX512_18_inShuf0<>+0x30(SB)/8, $0x0303020201010000
792DATA expandAVX512_18_inShuf0<>+0x38(SB)/8, $0xff03020201010000
793
794GLOBL expandAVX512_18_mat0<>(SB), RODATA, $0x40
795DATA expandAVX512_18_mat0<>+0x00(SB)/8, $0x0101010101010101
796DATA expandAVX512_18_mat0<>+0x08(SB)/8, $0x0101020202020202
797DATA expandAVX512_18_mat0<>+0x10(SB)/8, $0x0202020202020202
798DATA expandAVX512_18_mat0<>+0x18(SB)/8, $0x0202020204040404
799DATA expandAVX512_18_mat0<>+0x20(SB)/8, $0x0404040404040404
800DATA expandAVX512_18_mat0<>+0x28(SB)/8, $0x0404040404040808
801DATA expandAVX512_18_mat0<>+0x30(SB)/8, $0x0808080808080808
802DATA expandAVX512_18_mat0<>+0x38(SB)/8, $0x1010101010101010
803
804GLOBL expandAVX512_18_inShuf1<>(SB), RODATA, $0x40
805DATA expandAVX512_18_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
806DATA expandAVX512_18_inShuf1<>+0x08(SB)/8, $0xffffffffff020100
807DATA expandAVX512_18_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
808DATA expandAVX512_18_inShuf1<>+0x18(SB)/8, $0xffffffffff020100
809DATA expandAVX512_18_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
810DATA expandAVX512_18_inShuf1<>+0x28(SB)/8, $0xffff020201010000
811DATA expandAVX512_18_inShuf1<>+0x30(SB)/8, $0xff06060505040403
812DATA expandAVX512_18_inShuf1<>+0x38(SB)/8, $0xffffffff06050403
813
814GLOBL expandAVX512_18_mat1<>(SB), RODATA, $0x40
815DATA expandAVX512_18_mat1<>+0x00(SB)/8, $0x1010202020202020
816DATA expandAVX512_18_mat1<>+0x08(SB)/8, $0x2020202020202020
817DATA expandAVX512_18_mat1<>+0x10(SB)/8, $0x2020202040404040
818DATA expandAVX512_18_mat1<>+0x18(SB)/8, $0x4040404040404040
819DATA expandAVX512_18_mat1<>+0x20(SB)/8, $0x4040404040408080
820DATA expandAVX512_18_mat1<>+0x28(SB)/8, $0x8080808080808080
821DATA expandAVX512_18_mat1<>+0x30(SB)/8, $0x1010101010101010
822DATA expandAVX512_18_mat1<>+0x38(SB)/8, $0x1010202020202020
823
824GLOBL expandAVX512_18_inShuf2<>(SB), RODATA, $0x40
825DATA expandAVX512_18_inShuf2<>+0x00(SB)/8, $0xffffffff06050403
826DATA expandAVX512_18_inShuf2<>+0x08(SB)/8, $0xffffffff06050403
827DATA expandAVX512_18_inShuf2<>+0x10(SB)/8, $0xffffffff06050403
828DATA expandAVX512_18_inShuf2<>+0x18(SB)/8, $0xffffffff06050403
829DATA expandAVX512_18_inShuf2<>+0x20(SB)/8, $0x0606050504040303
830DATA expandAVX512_18_inShuf2<>+0x28(SB)/8, $0x0707060605050404
831DATA expandAVX512_18_inShuf2<>+0x30(SB)/8, $0xffffffffff060504
832DATA expandAVX512_18_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
833
834GLOBL expandAVX512_18_mat2<>(SB), RODATA, $0x40
835DATA expandAVX512_18_mat2<>+0x00(SB)/8, $0x2020202020202020
836DATA expandAVX512_18_mat2<>+0x08(SB)/8, $0x2020202040404040
837DATA expandAVX512_18_mat2<>+0x10(SB)/8, $0x4040404040404040
838DATA expandAVX512_18_mat2<>+0x18(SB)/8, $0x4040404040408080
839DATA expandAVX512_18_mat2<>+0x20(SB)/8, $0x8080808080808080
840DATA expandAVX512_18_mat2<>+0x28(SB)/8, $0x0101010101010101
841DATA expandAVX512_18_mat2<>+0x30(SB)/8, $0x0101020202020202
842DATA expandAVX512_18_mat2<>+0x38(SB)/8, $0x0202020202020202
843
844GLOBL expandAVX512_18_inShuf3<>(SB), RODATA, $0x40
845DATA expandAVX512_18_inShuf3<>+0x00(SB)/8, $0xffffffffff060504
846DATA expandAVX512_18_inShuf3<>+0x08(SB)/8, $0xffffffffff060504
847DATA expandAVX512_18_inShuf3<>+0x10(SB)/8, $0xffffffffff060504
848DATA expandAVX512_18_inShuf3<>+0x18(SB)/8, $0xffff060605050404
849DATA expandAVX512_18_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
850DATA expandAVX512_18_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
851DATA expandAVX512_18_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
852DATA expandAVX512_18_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
853
854GLOBL expandAVX512_18_mat3<>(SB), RODATA, $0x40
855DATA expandAVX512_18_mat3<>+0x00(SB)/8, $0x0202020204040404
856DATA expandAVX512_18_mat3<>+0x08(SB)/8, $0x0404040404040404
857DATA expandAVX512_18_mat3<>+0x10(SB)/8, $0x0404040404040808
858DATA expandAVX512_18_mat3<>+0x18(SB)/8, $0x0808080808080808
859DATA expandAVX512_18_mat3<>+0x20(SB)/8, $0x0000000000000000
860DATA expandAVX512_18_mat3<>+0x28(SB)/8, $0x0000000000000000
861DATA expandAVX512_18_mat3<>+0x30(SB)/8, $0x0000000000000000
862DATA expandAVX512_18_mat3<>+0x38(SB)/8, $0x0000000000000000
863
864GLOBL expandAVX512_18_outShufLo(SB), RODATA, $0x40
865DATA expandAVX512_18_outShufLo+0x00(SB)/8, $0x3028201810080100
866DATA expandAVX512_18_outShufLo+0x08(SB)/8, $0x6058504840393831
867DATA expandAVX512_18_outShufLo+0x10(SB)/8, $0x2119110903026968
868DATA expandAVX512_18_outShufLo+0x18(SB)/8, $0x5149413b3a333229
869DATA expandAVX512_18_outShufLo+0x20(SB)/8, $0x120a05046b6a6159
870DATA expandAVX512_18_outShufLo+0x28(SB)/8, $0x423d3c35342a221a
871DATA expandAVX512_18_outShufLo+0x30(SB)/8, $0x07066d6c625a524a
872DATA expandAVX512_18_outShufLo+0x38(SB)/8, $0x3e37362b231b130b
873
874GLOBL expandAVX512_18_outShufHi0(SB), RODATA, $0x40
875DATA expandAVX512_18_outShufHi0+0x00(SB)/8, $0x6160585048403830
876DATA expandAVX512_18_outShufHi0+0x08(SB)/8, $0xffffffff78706968
877DATA expandAVX512_18_outShufHi0+0x10(SB)/8, $0x59514941393231ff
878DATA expandAVX512_18_outShufHi0+0x18(SB)/8, $0xffff79716b6a6362
879DATA expandAVX512_18_outShufHi0+0x20(SB)/8, $0x4a423a3433ffffff
880DATA expandAVX512_18_outShufHi0+0x28(SB)/8, $0x7a726d6c65645a52
881DATA expandAVX512_18_outShufHi0+0x30(SB)/8, $0x3b3635ffffffffff
882DATA expandAVX512_18_outShufHi0+0x38(SB)/8, $0x6f6e67665b534b43
883
884GLOBL expandAVX512_18_outShufHi1(SB), RODATA, $0x40
885DATA expandAVX512_18_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
886DATA expandAVX512_18_outShufHi1+0x08(SB)/8, $0x18100800ffffffff
887DATA expandAVX512_18_outShufHi1+0x10(SB)/8, $0xffffffffffffff19
888DATA expandAVX512_18_outShufHi1+0x18(SB)/8, $0x0901ffffffffffff
889DATA expandAVX512_18_outShufHi1+0x20(SB)/8, $0xffffffffff1b1a11
890DATA expandAVX512_18_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
891DATA expandAVX512_18_outShufHi1+0x30(SB)/8, $0xffffff1d1c120a02
892DATA expandAVX512_18_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
893
894TEXT expandAVX512_18<>(SB), NOSPLIT, $0-0
895 VMOVDQU64 expandAVX512_18_inShuf0<>(SB), Z0
896 VMOVDQU64 expandAVX512_18_inShuf1<>(SB), Z2
897 VMOVDQU64 expandAVX512_18_inShuf2<>(SB), Z3
898 VMOVDQU64 expandAVX512_18_inShuf3<>(SB), Z4
899 VMOVDQU64 expandAVX512_18_outShufLo(SB), Z1
900 VMOVDQU64 expandAVX512_18_outShufHi0(SB), Z5
901 VMOVDQU64 expandAVX512_18_outShufHi1(SB), Z6
902 VMOVDQU64 (AX), Z7
903 VPERMB Z7, Z0, Z0
904 VGF2P8AFFINEQB $0, expandAVX512_18_mat0<>(SB), Z0, Z0
905 VPERMB Z7, Z2, Z2
906 VGF2P8AFFINEQB $0, expandAVX512_18_mat1<>(SB), Z2, Z2
907 VPERMB Z7, Z3, Z3
908 VGF2P8AFFINEQB $0, expandAVX512_18_mat2<>(SB), Z3, Z3
909 VPERMB Z7, Z4, Z4
910 VGF2P8AFFINEQB $0, expandAVX512_18_mat3<>(SB), Z4, Z4
911 VPERMI2B Z2, Z0, Z1
912 MOVQ $0xffe0fff83ffe0fff, AX
913 KMOVQ AX, K1
914 VPERMI2B.Z Z3, Z2, K1, Z5
915 MOVQ $0x1f0007c001f000, AX
916 KMOVQ AX, K1
917 VPERMB.Z Z4, Z6, K1, Z0
918 VPORQ Z0, Z5, Z2
919 RET
920
921GLOBL expandAVX512_20_inShuf0<>(SB), RODATA, $0x40
922DATA expandAVX512_20_inShuf0<>+0x00(SB)/8, $0x0303020201010000
923DATA expandAVX512_20_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
924DATA expandAVX512_20_inShuf0<>+0x10(SB)/8, $0xff03020201010000
925DATA expandAVX512_20_inShuf0<>+0x18(SB)/8, $0xffff020201010000
926DATA expandAVX512_20_inShuf0<>+0x20(SB)/8, $0xffffffffff020100
927DATA expandAVX512_20_inShuf0<>+0x28(SB)/8, $0xffff020201010000
928DATA expandAVX512_20_inShuf0<>+0x30(SB)/8, $0xffff020201010000
929DATA expandAVX512_20_inShuf0<>+0x38(SB)/8, $0xffffffffff020100
930
931GLOBL expandAVX512_20_mat0<>(SB), RODATA, $0x40
932DATA expandAVX512_20_mat0<>+0x00(SB)/8, $0x0101010101010101
933DATA expandAVX512_20_mat0<>+0x08(SB)/8, $0x0101010102020202
934DATA expandAVX512_20_mat0<>+0x10(SB)/8, $0x0202020202020202
935DATA expandAVX512_20_mat0<>+0x18(SB)/8, $0x0404040404040404
936DATA expandAVX512_20_mat0<>+0x20(SB)/8, $0x0404040408080808
937DATA expandAVX512_20_mat0<>+0x28(SB)/8, $0x0808080808080808
938DATA expandAVX512_20_mat0<>+0x30(SB)/8, $0x1010101010101010
939DATA expandAVX512_20_mat0<>+0x38(SB)/8, $0x1010101020202020
940
941GLOBL expandAVX512_20_inShuf1<>(SB), RODATA, $0x40
942DATA expandAVX512_20_inShuf1<>+0x00(SB)/8, $0xffff020201010000
943DATA expandAVX512_20_inShuf1<>+0x08(SB)/8, $0xffff020201010000
944DATA expandAVX512_20_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
945DATA expandAVX512_20_inShuf1<>+0x18(SB)/8, $0xffff020201010000
946DATA expandAVX512_20_inShuf1<>+0x20(SB)/8, $0xff06060505040403
947DATA expandAVX512_20_inShuf1<>+0x28(SB)/8, $0x0606050504040303
948DATA expandAVX512_20_inShuf1<>+0x30(SB)/8, $0xffffffff06050403
949DATA expandAVX512_20_inShuf1<>+0x38(SB)/8, $0xffff050504040303
950
951GLOBL expandAVX512_20_mat1<>(SB), RODATA, $0x40
952DATA expandAVX512_20_mat1<>+0x00(SB)/8, $0x2020202020202020
953DATA expandAVX512_20_mat1<>+0x08(SB)/8, $0x4040404040404040
954DATA expandAVX512_20_mat1<>+0x10(SB)/8, $0x4040404080808080
955DATA expandAVX512_20_mat1<>+0x18(SB)/8, $0x8080808080808080
956DATA expandAVX512_20_mat1<>+0x20(SB)/8, $0x0202020202020202
957DATA expandAVX512_20_mat1<>+0x28(SB)/8, $0x0404040404040404
958DATA expandAVX512_20_mat1<>+0x30(SB)/8, $0x0404040408080808
959DATA expandAVX512_20_mat1<>+0x38(SB)/8, $0x0808080808080808
960
961GLOBL expandAVX512_20_inShuf2<>(SB), RODATA, $0x40
962DATA expandAVX512_20_inShuf2<>+0x00(SB)/8, $0xffff050504040303
963DATA expandAVX512_20_inShuf2<>+0x08(SB)/8, $0xffffffffff050403
964DATA expandAVX512_20_inShuf2<>+0x10(SB)/8, $0xffff050504040303
965DATA expandAVX512_20_inShuf2<>+0x18(SB)/8, $0xffff050504040303
966DATA expandAVX512_20_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
967DATA expandAVX512_20_inShuf2<>+0x28(SB)/8, $0xffff050504040303
968DATA expandAVX512_20_inShuf2<>+0x30(SB)/8, $0xffff060605050404
969DATA expandAVX512_20_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
970
971GLOBL expandAVX512_20_mat2<>(SB), RODATA, $0x40
972DATA expandAVX512_20_mat2<>+0x00(SB)/8, $0x1010101010101010
973DATA expandAVX512_20_mat2<>+0x08(SB)/8, $0x1010101020202020
974DATA expandAVX512_20_mat2<>+0x10(SB)/8, $0x2020202020202020
975DATA expandAVX512_20_mat2<>+0x18(SB)/8, $0x4040404040404040
976DATA expandAVX512_20_mat2<>+0x20(SB)/8, $0x4040404080808080
977DATA expandAVX512_20_mat2<>+0x28(SB)/8, $0x8080808080808080
978DATA expandAVX512_20_mat2<>+0x30(SB)/8, $0x0101010101010101
979DATA expandAVX512_20_mat2<>+0x38(SB)/8, $0x0101010102020202
980
981GLOBL expandAVX512_20_outShufLo(SB), RODATA, $0x40
982DATA expandAVX512_20_outShufLo+0x00(SB)/8, $0x2019181110080100
983DATA expandAVX512_20_outShufLo+0x08(SB)/8, $0x4841403831302928
984DATA expandAVX512_20_outShufLo+0x10(SB)/8, $0x1209030259585049
985DATA expandAVX512_20_outShufLo+0x18(SB)/8, $0x33322b2a211b1a13
986DATA expandAVX512_20_outShufLo+0x20(SB)/8, $0x5b5a514b4a434239
987DATA expandAVX512_20_outShufLo+0x28(SB)/8, $0x221d1c15140a0504
988DATA expandAVX512_20_outShufLo+0x30(SB)/8, $0x4c45443a35342d2c
989DATA expandAVX512_20_outShufLo+0x38(SB)/8, $0x160b07065d5c524d
990
991GLOBL expandAVX512_20_outShufHi(SB), RODATA, $0x40
992DATA expandAVX512_20_outShufHi+0x00(SB)/8, $0x4140393830292820
993DATA expandAVX512_20_outShufHi+0x08(SB)/8, $0x6968605958515048
994DATA expandAVX512_20_outShufHi+0x10(SB)/8, $0x312b2a2221787170
995DATA expandAVX512_20_outShufHi+0x18(SB)/8, $0x5a53524943423b3a
996DATA expandAVX512_20_outShufHi+0x20(SB)/8, $0x237973726b6a615b
997DATA expandAVX512_20_outShufHi+0x28(SB)/8, $0x45443d3c322d2c24
998DATA expandAVX512_20_outShufHi+0x30(SB)/8, $0x6d6c625d5c55544a
999DATA expandAVX512_20_outShufHi+0x38(SB)/8, $0x332f2e26257a7574
1000
1001TEXT expandAVX512_20<>(SB), NOSPLIT, $0-0
1002 VMOVDQU64 expandAVX512_20_inShuf0<>(SB), Z0
1003 VMOVDQU64 expandAVX512_20_inShuf1<>(SB), Z3
1004 VMOVDQU64 expandAVX512_20_inShuf2<>(SB), Z4
1005 VMOVDQU64 expandAVX512_20_outShufLo(SB), Z1
1006 VMOVDQU64 expandAVX512_20_outShufHi(SB), Z2
1007 VMOVDQU64 (AX), Z5
1008 VPERMB Z5, Z0, Z0
1009 VGF2P8AFFINEQB $0, expandAVX512_20_mat0<>(SB), Z0, Z0
1010 VPERMB Z5, Z3, Z3
1011 VGF2P8AFFINEQB $0, expandAVX512_20_mat1<>(SB), Z3, Z3
1012 VPERMB Z5, Z4, Z4
1013 VGF2P8AFFINEQB $0, expandAVX512_20_mat2<>(SB), Z4, Z4
1014 VPERMI2B Z3, Z0, Z1
1015 VPERMI2B Z4, Z3, Z2
1016 RET
1017
1018GLOBL expandAVX512_22_inShuf0<>(SB), RODATA, $0x40
1019DATA expandAVX512_22_inShuf0<>+0x00(SB)/8, $0xffff020201010000
1020DATA expandAVX512_22_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
1021DATA expandAVX512_22_inShuf0<>+0x10(SB)/8, $0xffff020201010000
1022DATA expandAVX512_22_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
1023DATA expandAVX512_22_inShuf0<>+0x20(SB)/8, $0xffff020201010000
1024DATA expandAVX512_22_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
1025DATA expandAVX512_22_inShuf0<>+0x30(SB)/8, $0xffff020201010000
1026DATA expandAVX512_22_inShuf0<>+0x38(SB)/8, $0xffff020201010000
1027
1028GLOBL expandAVX512_22_mat0<>(SB), RODATA, $0x40
1029DATA expandAVX512_22_mat0<>+0x00(SB)/8, $0x0101010101010101
1030DATA expandAVX512_22_mat0<>+0x08(SB)/8, $0x0101010101010202
1031DATA expandAVX512_22_mat0<>+0x10(SB)/8, $0x0202020202020202
1032DATA expandAVX512_22_mat0<>+0x18(SB)/8, $0x0202020204040404
1033DATA expandAVX512_22_mat0<>+0x20(SB)/8, $0x0404040404040404
1034DATA expandAVX512_22_mat0<>+0x28(SB)/8, $0x0404080808080808
1035DATA expandAVX512_22_mat0<>+0x30(SB)/8, $0x0808080808080808
1036DATA expandAVX512_22_mat0<>+0x38(SB)/8, $0x1010101010101010
1037
1038GLOBL expandAVX512_22_inShuf1<>(SB), RODATA, $0x40
1039DATA expandAVX512_22_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
1040DATA expandAVX512_22_inShuf1<>+0x08(SB)/8, $0xffff020201010000
1041DATA expandAVX512_22_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
1042DATA expandAVX512_22_inShuf1<>+0x18(SB)/8, $0xffff020201010000
1043DATA expandAVX512_22_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
1044DATA expandAVX512_22_inShuf1<>+0x28(SB)/8, $0xffffffff01010000
1045DATA expandAVX512_22_inShuf1<>+0x30(SB)/8, $0xffff040403030202
1046DATA expandAVX512_22_inShuf1<>+0x38(SB)/8, $0xffff050504040303
1047
1048GLOBL expandAVX512_22_mat1<>(SB), RODATA, $0x40
1049DATA expandAVX512_22_mat1<>+0x00(SB)/8, $0x1010101010102020
1050DATA expandAVX512_22_mat1<>+0x08(SB)/8, $0x2020202020202020
1051DATA expandAVX512_22_mat1<>+0x10(SB)/8, $0x2020202040404040
1052DATA expandAVX512_22_mat1<>+0x18(SB)/8, $0x4040404040404040
1053DATA expandAVX512_22_mat1<>+0x20(SB)/8, $0x4040808080808080
1054DATA expandAVX512_22_mat1<>+0x28(SB)/8, $0x8080808080808080
1055DATA expandAVX512_22_mat1<>+0x30(SB)/8, $0x8080808080808080
1056DATA expandAVX512_22_mat1<>+0x38(SB)/8, $0x0101010101010101
1057
1058GLOBL expandAVX512_22_inShuf2<>(SB), RODATA, $0x40
1059DATA expandAVX512_22_inShuf2<>+0x00(SB)/8, $0xffffffffff050403
1060DATA expandAVX512_22_inShuf2<>+0x08(SB)/8, $0xffff050504040303
1061DATA expandAVX512_22_inShuf2<>+0x10(SB)/8, $0xffffffffff050403
1062DATA expandAVX512_22_inShuf2<>+0x18(SB)/8, $0xffff050504040303
1063DATA expandAVX512_22_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
1064DATA expandAVX512_22_inShuf2<>+0x28(SB)/8, $0xffff050504040303
1065DATA expandAVX512_22_inShuf2<>+0x30(SB)/8, $0xffff050504040303
1066DATA expandAVX512_22_inShuf2<>+0x38(SB)/8, $0xffffffffff050403
1067
1068GLOBL expandAVX512_22_mat2<>(SB), RODATA, $0x40
1069DATA expandAVX512_22_mat2<>+0x00(SB)/8, $0x0101010101010202
1070DATA expandAVX512_22_mat2<>+0x08(SB)/8, $0x0202020202020202
1071DATA expandAVX512_22_mat2<>+0x10(SB)/8, $0x0202020204040404
1072DATA expandAVX512_22_mat2<>+0x18(SB)/8, $0x0404040404040404
1073DATA expandAVX512_22_mat2<>+0x20(SB)/8, $0x0404080808080808
1074DATA expandAVX512_22_mat2<>+0x28(SB)/8, $0x0808080808080808
1075DATA expandAVX512_22_mat2<>+0x30(SB)/8, $0x1010101010101010
1076DATA expandAVX512_22_mat2<>+0x38(SB)/8, $0x1010101010102020
1077
1078GLOBL expandAVX512_22_inShuf3<>(SB), RODATA, $0x40
1079DATA expandAVX512_22_inShuf3<>+0x00(SB)/8, $0xffff050504040303
1080DATA expandAVX512_22_inShuf3<>+0x08(SB)/8, $0xffffffffff050403
1081DATA expandAVX512_22_inShuf3<>+0x10(SB)/8, $0xffffff0504040303
1082DATA expandAVX512_22_inShuf3<>+0x18(SB)/8, $0xffffffffffff0403
1083DATA expandAVX512_22_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
1084DATA expandAVX512_22_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
1085DATA expandAVX512_22_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1086DATA expandAVX512_22_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1087
1088GLOBL expandAVX512_22_mat3<>(SB), RODATA, $0x40
1089DATA expandAVX512_22_mat3<>+0x00(SB)/8, $0x2020202020202020
1090DATA expandAVX512_22_mat3<>+0x08(SB)/8, $0x2020202040404040
1091DATA expandAVX512_22_mat3<>+0x10(SB)/8, $0x4040404040404040
1092DATA expandAVX512_22_mat3<>+0x18(SB)/8, $0x4040808080808080
1093DATA expandAVX512_22_mat3<>+0x20(SB)/8, $0x0000000000000000
1094DATA expandAVX512_22_mat3<>+0x28(SB)/8, $0x0000000000000000
1095DATA expandAVX512_22_mat3<>+0x30(SB)/8, $0x0000000000000000
1096DATA expandAVX512_22_mat3<>+0x38(SB)/8, $0x0000000000000000
1097
1098GLOBL expandAVX512_22_outShufLo(SB), RODATA, $0x40
1099DATA expandAVX512_22_outShufLo+0x00(SB)/8, $0x2120181110080100
1100DATA expandAVX512_22_outShufLo+0x08(SB)/8, $0x4948403938313028
1101DATA expandAVX512_22_outShufLo+0x10(SB)/8, $0x0302696860595850
1102DATA expandAVX512_22_outShufLo+0x18(SB)/8, $0x3229232219131209
1103DATA expandAVX512_22_outShufLo+0x20(SB)/8, $0x5a514b4a413b3a33
1104DATA expandAVX512_22_outShufLo+0x28(SB)/8, $0x140a05046b6a615b
1105DATA expandAVX512_22_outShufLo+0x30(SB)/8, $0x3c35342a25241a15
1106DATA expandAVX512_22_outShufLo+0x38(SB)/8, $0x625d5c524d4c423d
1107
1108GLOBL expandAVX512_22_outShufHi0(SB), RODATA, $0x40
1109DATA expandAVX512_22_outShufHi0+0x00(SB)/8, $0x5049484039383130
1110DATA expandAVX512_22_outShufHi0+0x08(SB)/8, $0x7871706968605958
1111DATA expandAVX512_22_outShufHi0+0x10(SB)/8, $0x3332ffffffffffff
1112DATA expandAVX512_22_outShufHi0+0x18(SB)/8, $0x5b5a514b4a413b3a
1113DATA expandAVX512_22_outShufHi0+0x20(SB)/8, $0xffff7973726b6a61
1114DATA expandAVX512_22_outShufHi0+0x28(SB)/8, $0x3d3c3534ffffffff
1115DATA expandAVX512_22_outShufHi0+0x30(SB)/8, $0x6c625d5c524d4c42
1116DATA expandAVX512_22_outShufHi0+0x38(SB)/8, $0xffffffff7a75746d
1117
1118GLOBL expandAVX512_22_outShufHi1(SB), RODATA, $0x40
1119DATA expandAVX512_22_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1120DATA expandAVX512_22_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1121DATA expandAVX512_22_outShufHi1+0x10(SB)/8, $0xffff181110080100
1122DATA expandAVX512_22_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
1123DATA expandAVX512_22_outShufHi1+0x20(SB)/8, $0x0302ffffffffffff
1124DATA expandAVX512_22_outShufHi1+0x28(SB)/8, $0xffffffff19131209
1125DATA expandAVX512_22_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
1126DATA expandAVX512_22_outShufHi1+0x38(SB)/8, $0x140a0504ffffffff
1127
1128TEXT expandAVX512_22<>(SB), NOSPLIT, $0-0
1129 VMOVDQU64 expandAVX512_22_inShuf0<>(SB), Z0
1130 VMOVDQU64 expandAVX512_22_inShuf1<>(SB), Z2
1131 VMOVDQU64 expandAVX512_22_inShuf2<>(SB), Z3
1132 VMOVDQU64 expandAVX512_22_inShuf3<>(SB), Z4
1133 VMOVDQU64 expandAVX512_22_outShufLo(SB), Z1
1134 VMOVDQU64 expandAVX512_22_outShufHi0(SB), Z5
1135 VMOVDQU64 expandAVX512_22_outShufHi1(SB), Z6
1136 VMOVDQU64 (AX), Z7
1137 VPERMB Z7, Z0, Z0
1138 VGF2P8AFFINEQB $0, expandAVX512_22_mat0<>(SB), Z0, Z0
1139 VPERMB Z7, Z2, Z2
1140 VGF2P8AFFINEQB $0, expandAVX512_22_mat1<>(SB), Z2, Z2
1141 VPERMB Z7, Z3, Z3
1142 VGF2P8AFFINEQB $0, expandAVX512_22_mat2<>(SB), Z3, Z3
1143 VPERMB Z7, Z4, Z4
1144 VGF2P8AFFINEQB $0, expandAVX512_22_mat3<>(SB), Z4, Z4
1145 VPERMI2B Z2, Z0, Z1
1146 MOVQ $0xffff03fffc0ffff, AX
1147 KMOVQ AX, K1
1148 VPERMI2B.Z Z3, Z2, K1, Z5
1149 MOVQ $0xf0000fc0003f0000, AX
1150 KMOVQ AX, K1
1151 VPERMB.Z Z4, Z6, K1, Z0
1152 VPORQ Z0, Z5, Z2
1153 RET
1154
1155GLOBL expandAVX512_24_inShuf0<>(SB), RODATA, $0x40
1156DATA expandAVX512_24_inShuf0<>+0x00(SB)/8, $0x0202010101000000
1157DATA expandAVX512_24_inShuf0<>+0x08(SB)/8, $0x0202010101000000
1158DATA expandAVX512_24_inShuf0<>+0x10(SB)/8, $0x0202010101000000
1159DATA expandAVX512_24_inShuf0<>+0x18(SB)/8, $0x0202010101000000
1160DATA expandAVX512_24_inShuf0<>+0x20(SB)/8, $0x0202010101000000
1161DATA expandAVX512_24_inShuf0<>+0x28(SB)/8, $0xff02010101000000
1162DATA expandAVX512_24_inShuf0<>+0x30(SB)/8, $0xffff010101000000
1163DATA expandAVX512_24_inShuf0<>+0x38(SB)/8, $0xffff010101000000
1164
1165GLOBL expandAVX512_24_mat0<>(SB), RODATA, $0x40
1166DATA expandAVX512_24_mat0<>+0x00(SB)/8, $0x0101010101010101
1167DATA expandAVX512_24_mat0<>+0x08(SB)/8, $0x0202020202020202
1168DATA expandAVX512_24_mat0<>+0x10(SB)/8, $0x0404040404040404
1169DATA expandAVX512_24_mat0<>+0x18(SB)/8, $0x0808080808080808
1170DATA expandAVX512_24_mat0<>+0x20(SB)/8, $0x1010101010101010
1171DATA expandAVX512_24_mat0<>+0x28(SB)/8, $0x2020202020202020
1172DATA expandAVX512_24_mat0<>+0x30(SB)/8, $0x4040404040404040
1173DATA expandAVX512_24_mat0<>+0x38(SB)/8, $0x8080808080808080
1174
1175GLOBL expandAVX512_24_inShuf1<>(SB), RODATA, $0x40
1176DATA expandAVX512_24_inShuf1<>+0x00(SB)/8, $0xffffffffffffff02
1177DATA expandAVX512_24_inShuf1<>+0x08(SB)/8, $0xffffffffffffff02
1178DATA expandAVX512_24_inShuf1<>+0x10(SB)/8, $0xffffffffffffff02
1179DATA expandAVX512_24_inShuf1<>+0x18(SB)/8, $0xffffffffffffff02
1180DATA expandAVX512_24_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
1181DATA expandAVX512_24_inShuf1<>+0x28(SB)/8, $0x0404040303030202
1182DATA expandAVX512_24_inShuf1<>+0x30(SB)/8, $0x0404030303020202
1183DATA expandAVX512_24_inShuf1<>+0x38(SB)/8, $0x0404030303020202
1184
1185GLOBL expandAVX512_24_inShuf2<>(SB), RODATA, $0x40
1186DATA expandAVX512_24_inShuf2<>+0x00(SB)/8, $0x0505040404030303
1187DATA expandAVX512_24_inShuf2<>+0x08(SB)/8, $0x0505040404030303
1188DATA expandAVX512_24_inShuf2<>+0x10(SB)/8, $0x0505040404030303
1189DATA expandAVX512_24_inShuf2<>+0x18(SB)/8, $0xffff040404030303
1190DATA expandAVX512_24_inShuf2<>+0x20(SB)/8, $0xffff040404030303
1191DATA expandAVX512_24_inShuf2<>+0x28(SB)/8, $0xffffffffffffff04
1192DATA expandAVX512_24_inShuf2<>+0x30(SB)/8, $0xffffffffffffff04
1193DATA expandAVX512_24_inShuf2<>+0x38(SB)/8, $0xffffffffffffff05
1194
1195GLOBL expandAVX512_24_mat2<>(SB), RODATA, $0x40
1196DATA expandAVX512_24_mat2<>+0x00(SB)/8, $0x0101010101010101
1197DATA expandAVX512_24_mat2<>+0x08(SB)/8, $0x0202020202020202
1198DATA expandAVX512_24_mat2<>+0x10(SB)/8, $0x0404040404040404
1199DATA expandAVX512_24_mat2<>+0x18(SB)/8, $0x0808080808080808
1200DATA expandAVX512_24_mat2<>+0x20(SB)/8, $0x1010101010101010
1201DATA expandAVX512_24_mat2<>+0x28(SB)/8, $0x4040404040404040
1202DATA expandAVX512_24_mat2<>+0x30(SB)/8, $0x8080808080808080
1203DATA expandAVX512_24_mat2<>+0x38(SB)/8, $0x0101010101010101
1204
1205GLOBL expandAVX512_24_inShuf3<>(SB), RODATA, $0x40
1206DATA expandAVX512_24_inShuf3<>+0x00(SB)/8, $0xffffffffffffff05
1207DATA expandAVX512_24_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
1208DATA expandAVX512_24_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
1209DATA expandAVX512_24_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
1210DATA expandAVX512_24_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
1211DATA expandAVX512_24_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
1212DATA expandAVX512_24_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1213DATA expandAVX512_24_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1214
1215GLOBL expandAVX512_24_mat3<>(SB), RODATA, $0x40
1216DATA expandAVX512_24_mat3<>+0x00(SB)/8, $0x0202020202020202
1217DATA expandAVX512_24_mat3<>+0x08(SB)/8, $0x0000000000000000
1218DATA expandAVX512_24_mat3<>+0x10(SB)/8, $0x0000000000000000
1219DATA expandAVX512_24_mat3<>+0x18(SB)/8, $0x0000000000000000
1220DATA expandAVX512_24_mat3<>+0x20(SB)/8, $0x0000000000000000
1221DATA expandAVX512_24_mat3<>+0x28(SB)/8, $0x0000000000000000
1222DATA expandAVX512_24_mat3<>+0x30(SB)/8, $0x0000000000000000
1223DATA expandAVX512_24_mat3<>+0x38(SB)/8, $0x0000000000000000
1224
1225GLOBL expandAVX512_24_outShufLo(SB), RODATA, $0x40
1226DATA expandAVX512_24_outShufLo+0x00(SB)/8, $0x11100a0908020100
1227DATA expandAVX512_24_outShufLo+0x08(SB)/8, $0x282221201a191812
1228DATA expandAVX512_24_outShufLo+0x10(SB)/8, $0x3a39383231302a29
1229DATA expandAVX512_24_outShufLo+0x18(SB)/8, $0x14130d0c0b050403
1230DATA expandAVX512_24_outShufLo+0x20(SB)/8, $0x2b2524231d1c1b15
1231DATA expandAVX512_24_outShufLo+0x28(SB)/8, $0x3d3c3b3534332d2c
1232DATA expandAVX512_24_outShufLo+0x30(SB)/8, $0x1716480f0e400706
1233DATA expandAVX512_24_outShufLo+0x38(SB)/8, $0x2e602726581f1e50
1234
1235GLOBL expandAVX512_24_outShufHi0(SB), RODATA, $0x40
1236DATA expandAVX512_24_outShufHi0+0x00(SB)/8, $0x3a39383231302928
1237DATA expandAVX512_24_outShufHi0+0x08(SB)/8, $0x51504a4948424140
1238DATA expandAVX512_24_outShufHi0+0x10(SB)/8, $0x2a6261605a595852
1239DATA expandAVX512_24_outShufHi0+0x18(SB)/8, $0x3d3c3b3534332c2b
1240DATA expandAVX512_24_outShufHi0+0x20(SB)/8, $0x54534d4c4b454443
1241DATA expandAVX512_24_outShufHi0+0x28(SB)/8, $0x2d6564635d5c5b55
1242DATA expandAVX512_24_outShufHi0+0x30(SB)/8, $0x703f3e6837362f2e
1243DATA expandAVX512_24_outShufHi0+0x38(SB)/8, $0x5756ff4f4e784746
1244
1245GLOBL expandAVX512_24_outShufHi1(SB), RODATA, $0x40
1246DATA expandAVX512_24_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1247DATA expandAVX512_24_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1248DATA expandAVX512_24_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
1249DATA expandAVX512_24_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
1250DATA expandAVX512_24_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1251DATA expandAVX512_24_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
1252DATA expandAVX512_24_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
1253DATA expandAVX512_24_outShufHi1+0x38(SB)/8, $0xffff00ffffffffff
1254
1255TEXT expandAVX512_24<>(SB), NOSPLIT, $0-0
1256 VMOVDQU64 expandAVX512_24_inShuf0<>(SB), Z0
1257 VMOVDQU64 expandAVX512_24_mat0<>(SB), Z2
1258 VMOVDQU64 expandAVX512_24_inShuf1<>(SB), Z3
1259 VMOVDQU64 expandAVX512_24_inShuf2<>(SB), Z4
1260 VMOVDQU64 expandAVX512_24_inShuf3<>(SB), Z5
1261 VMOVDQU64 expandAVX512_24_outShufLo(SB), Z1
1262 VMOVDQU64 expandAVX512_24_outShufHi0(SB), Z6
1263 VMOVDQU64 expandAVX512_24_outShufHi1(SB), Z7
1264 VMOVDQU64 (AX), Z8
1265 VPERMB Z8, Z0, Z0
1266 VGF2P8AFFINEQB $0, Z2, Z0, Z0
1267 VPERMB Z8, Z3, Z3
1268 VGF2P8AFFINEQB $0, Z2, Z3, Z2
1269 VPERMB Z8, Z4, Z3
1270 VGF2P8AFFINEQB $0, expandAVX512_24_mat2<>(SB), Z3, Z3
1271 VPERMB Z8, Z5, Z4
1272 VGF2P8AFFINEQB $0, expandAVX512_24_mat3<>(SB), Z4, Z4
1273 VPERMI2B Z2, Z0, Z1
1274 MOVQ $0xdfffffffffffffff, AX
1275 KMOVQ AX, K1
1276 VPERMI2B.Z Z3, Z2, K1, Z6
1277 MOVQ $0x2000000000000000, AX
1278 KMOVQ AX, K1
1279 VPERMB.Z Z4, Z7, K1, Z0
1280 VPORQ Z0, Z6, Z2
1281 RET
1282
1283GLOBL expandAVX512_26_inShuf0<>(SB), RODATA, $0x40
1284DATA expandAVX512_26_inShuf0<>+0x00(SB)/8, $0x0202010101000000
1285DATA expandAVX512_26_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
1286DATA expandAVX512_26_inShuf0<>+0x10(SB)/8, $0xffff020201010000
1287DATA expandAVX512_26_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
1288DATA expandAVX512_26_inShuf0<>+0x20(SB)/8, $0xffff020201010000
1289DATA expandAVX512_26_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
1290DATA expandAVX512_26_inShuf0<>+0x30(SB)/8, $0x0202010101000000
1291DATA expandAVX512_26_inShuf0<>+0x38(SB)/8, $0xffff010101000000
1292
1293GLOBL expandAVX512_26_mat0<>(SB), RODATA, $0x40
1294DATA expandAVX512_26_mat0<>+0x00(SB)/8, $0x0101010101010101
1295DATA expandAVX512_26_mat0<>+0x08(SB)/8, $0x0101020202020202
1296DATA expandAVX512_26_mat0<>+0x10(SB)/8, $0x0202020202020202
1297DATA expandAVX512_26_mat0<>+0x18(SB)/8, $0x0202020204040404
1298DATA expandAVX512_26_mat0<>+0x20(SB)/8, $0x0404040404040404
1299DATA expandAVX512_26_mat0<>+0x28(SB)/8, $0x0404040404040808
1300DATA expandAVX512_26_mat0<>+0x30(SB)/8, $0x0808080808080808
1301DATA expandAVX512_26_mat0<>+0x38(SB)/8, $0x1010101010101010
1302
1303GLOBL expandAVX512_26_inShuf1<>(SB), RODATA, $0x40
1304DATA expandAVX512_26_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
1305DATA expandAVX512_26_inShuf1<>+0x08(SB)/8, $0xffffffff01010000
1306DATA expandAVX512_26_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
1307DATA expandAVX512_26_inShuf1<>+0x18(SB)/8, $0xffffffff01010000
1308DATA expandAVX512_26_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
1309DATA expandAVX512_26_inShuf1<>+0x28(SB)/8, $0xffff010101000000
1310DATA expandAVX512_26_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
1311DATA expandAVX512_26_inShuf1<>+0x38(SB)/8, $0xff04040403030302
1312
1313GLOBL expandAVX512_26_mat1<>(SB), RODATA, $0x40
1314DATA expandAVX512_26_mat1<>+0x00(SB)/8, $0x1010202020202020
1315DATA expandAVX512_26_mat1<>+0x08(SB)/8, $0x2020202020202020
1316DATA expandAVX512_26_mat1<>+0x10(SB)/8, $0x2020202040404040
1317DATA expandAVX512_26_mat1<>+0x18(SB)/8, $0x4040404040404040
1318DATA expandAVX512_26_mat1<>+0x20(SB)/8, $0x4040404040408080
1319DATA expandAVX512_26_mat1<>+0x28(SB)/8, $0x8080808080808080
1320DATA expandAVX512_26_mat1<>+0x30(SB)/8, $0x0101010101010101
1321DATA expandAVX512_26_mat1<>+0x38(SB)/8, $0x0808080808080808
1322
1323GLOBL expandAVX512_26_inShuf2<>(SB), RODATA, $0x40
1324DATA expandAVX512_26_inShuf2<>+0x00(SB)/8, $0x0404030303020202
1325DATA expandAVX512_26_inShuf2<>+0x08(SB)/8, $0xffffffffff040302
1326DATA expandAVX512_26_inShuf2<>+0x10(SB)/8, $0xffff040403030202
1327DATA expandAVX512_26_inShuf2<>+0x18(SB)/8, $0xffffffffff040302
1328DATA expandAVX512_26_inShuf2<>+0x20(SB)/8, $0xffff040403030202
1329DATA expandAVX512_26_inShuf2<>+0x28(SB)/8, $0xffffffffff040302
1330DATA expandAVX512_26_inShuf2<>+0x30(SB)/8, $0xff04030303020202
1331DATA expandAVX512_26_inShuf2<>+0x38(SB)/8, $0xffff040404030303
1332
1333GLOBL expandAVX512_26_mat2<>(SB), RODATA, $0x40
1334DATA expandAVX512_26_mat2<>+0x00(SB)/8, $0x1010101010101010
1335DATA expandAVX512_26_mat2<>+0x08(SB)/8, $0x1010202020202020
1336DATA expandAVX512_26_mat2<>+0x10(SB)/8, $0x2020202020202020
1337DATA expandAVX512_26_mat2<>+0x18(SB)/8, $0x2020202040404040
1338DATA expandAVX512_26_mat2<>+0x20(SB)/8, $0x4040404040404040
1339DATA expandAVX512_26_mat2<>+0x28(SB)/8, $0x4040404040408080
1340DATA expandAVX512_26_mat2<>+0x30(SB)/8, $0x8080808080808080
1341DATA expandAVX512_26_mat2<>+0x38(SB)/8, $0x0101010101010101
1342
1343GLOBL expandAVX512_26_inShuf3<>(SB), RODATA, $0x40
1344DATA expandAVX512_26_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
1345DATA expandAVX512_26_inShuf3<>+0x08(SB)/8, $0xffffffff04040303
1346DATA expandAVX512_26_inShuf3<>+0x10(SB)/8, $0xffffffffffff0403
1347DATA expandAVX512_26_inShuf3<>+0x18(SB)/8, $0xffffffff04040303
1348DATA expandAVX512_26_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
1349DATA expandAVX512_26_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
1350DATA expandAVX512_26_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1351DATA expandAVX512_26_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1352
1353GLOBL expandAVX512_26_mat3<>(SB), RODATA, $0x40
1354DATA expandAVX512_26_mat3<>+0x00(SB)/8, $0x0101020202020202
1355DATA expandAVX512_26_mat3<>+0x08(SB)/8, $0x0202020202020202
1356DATA expandAVX512_26_mat3<>+0x10(SB)/8, $0x0202020204040404
1357DATA expandAVX512_26_mat3<>+0x18(SB)/8, $0x0404040404040404
1358DATA expandAVX512_26_mat3<>+0x20(SB)/8, $0x0404040404040808
1359DATA expandAVX512_26_mat3<>+0x28(SB)/8, $0x1010101010101010
1360DATA expandAVX512_26_mat3<>+0x30(SB)/8, $0x0000000000000000
1361DATA expandAVX512_26_mat3<>+0x38(SB)/8, $0x0000000000000000
1362
1363GLOBL expandAVX512_26_outShufLo(SB), RODATA, $0x40
1364DATA expandAVX512_26_outShufLo+0x00(SB)/8, $0x2018111008020100
1365DATA expandAVX512_26_outShufLo+0x08(SB)/8, $0x3a39383231302821
1366DATA expandAVX512_26_outShufLo+0x10(SB)/8, $0x6860595850494840
1367DATA expandAVX512_26_outShufLo+0x18(SB)/8, $0x1312090504036a69
1368DATA expandAVX512_26_outShufLo+0x20(SB)/8, $0x3b35343329232219
1369DATA expandAVX512_26_outShufLo+0x28(SB)/8, $0x5b5a514b4a413d3c
1370DATA expandAVX512_26_outShufLo+0x30(SB)/8, $0x0a7007066d6c6b61
1371DATA expandAVX512_26_outShufLo+0x38(SB)/8, $0x37362a25241a1514
1372
1373GLOBL expandAVX512_26_outShufHi0(SB), RODATA, $0x40
1374DATA expandAVX512_26_outShufHi0+0x00(SB)/8, $0x5851504842414038
1375DATA expandAVX512_26_outShufHi0+0x08(SB)/8, $0x7978727170686160
1376DATA expandAVX512_26_outShufHi0+0x10(SB)/8, $0xffffffffffffff7a
1377DATA expandAVX512_26_outShufHi0+0x18(SB)/8, $0x52494544433b3a39
1378DATA expandAVX512_26_outShufHi0+0x20(SB)/8, $0x7574736963625953
1379DATA expandAVX512_26_outShufHi0+0x28(SB)/8, $0xffffffffff7d7c7b
1380DATA expandAVX512_26_outShufHi0+0x30(SB)/8, $0xff47463e3d3cffff
1381DATA expandAVX512_26_outShufHi0+0x38(SB)/8, $0x766a65645a55544a
1382
1383GLOBL expandAVX512_26_outShufHi1(SB), RODATA, $0x40
1384DATA expandAVX512_26_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1385DATA expandAVX512_26_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1386DATA expandAVX512_26_outShufHi1+0x10(SB)/8, $0x20191810090800ff
1387DATA expandAVX512_26_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
1388DATA expandAVX512_26_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1389DATA expandAVX512_26_outShufHi1+0x28(SB)/8, $0x1a110b0a01ffffff
1390DATA expandAVX512_26_outShufHi1+0x30(SB)/8, $0x28ffffffffff211b
1391DATA expandAVX512_26_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
1392
1393TEXT expandAVX512_26<>(SB), NOSPLIT, $0-0
1394 VMOVDQU64 expandAVX512_26_inShuf0<>(SB), Z0
1395 VMOVDQU64 expandAVX512_26_inShuf1<>(SB), Z2
1396 VMOVDQU64 expandAVX512_26_inShuf2<>(SB), Z3
1397 VMOVDQU64 expandAVX512_26_inShuf3<>(SB), Z4
1398 VMOVDQU64 expandAVX512_26_outShufLo(SB), Z1
1399 VMOVDQU64 expandAVX512_26_outShufHi0(SB), Z5
1400 VMOVDQU64 expandAVX512_26_outShufHi1(SB), Z6
1401 VMOVDQU64 (AX), Z7
1402 VPERMB Z7, Z0, Z0
1403 VGF2P8AFFINEQB $0, expandAVX512_26_mat0<>(SB), Z0, Z0
1404 VPERMB Z7, Z2, Z2
1405 VGF2P8AFFINEQB $0, expandAVX512_26_mat1<>(SB), Z2, Z2
1406 VPERMB Z7, Z3, Z3
1407 VGF2P8AFFINEQB $0, expandAVX512_26_mat2<>(SB), Z3, Z3
1408 VPERMB Z7, Z4, Z4
1409 VGF2P8AFFINEQB $0, expandAVX512_26_mat3<>(SB), Z4, Z4
1410 VPERMI2B Z2, Z0, Z1
1411 MOVQ $0xff7c07ffff01ffff, AX
1412 KMOVQ AX, K1
1413 VPERMI2B.Z Z3, Z2, K1, Z5
1414 MOVQ $0x83f80000fe0000, AX
1415 KMOVQ AX, K1
1416 VPERMB.Z Z4, Z6, K1, Z0
1417 VPORQ Z0, Z5, Z2
1418 RET
1419
1420GLOBL expandAVX512_28_inShuf0<>(SB), RODATA, $0x40
1421DATA expandAVX512_28_inShuf0<>+0x00(SB)/8, $0x0202010101000000
1422DATA expandAVX512_28_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
1423DATA expandAVX512_28_inShuf0<>+0x10(SB)/8, $0x0202010101000000
1424DATA expandAVX512_28_inShuf0<>+0x18(SB)/8, $0xff02010101000000
1425DATA expandAVX512_28_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
1426DATA expandAVX512_28_inShuf0<>+0x28(SB)/8, $0xffff010101000000
1427DATA expandAVX512_28_inShuf0<>+0x30(SB)/8, $0xffff010101000000
1428DATA expandAVX512_28_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
1429
1430GLOBL expandAVX512_28_mat0<>(SB), RODATA, $0x40
1431DATA expandAVX512_28_mat0<>+0x00(SB)/8, $0x0101010101010101
1432DATA expandAVX512_28_mat0<>+0x08(SB)/8, $0x0101010102020202
1433DATA expandAVX512_28_mat0<>+0x10(SB)/8, $0x0202020202020202
1434DATA expandAVX512_28_mat0<>+0x18(SB)/8, $0x0404040404040404
1435DATA expandAVX512_28_mat0<>+0x20(SB)/8, $0x0404040408080808
1436DATA expandAVX512_28_mat0<>+0x28(SB)/8, $0x0808080808080808
1437DATA expandAVX512_28_mat0<>+0x30(SB)/8, $0x1010101010101010
1438DATA expandAVX512_28_mat0<>+0x38(SB)/8, $0x1010101020202020
1439
1440GLOBL expandAVX512_28_inShuf1<>(SB), RODATA, $0x40
1441DATA expandAVX512_28_inShuf1<>+0x00(SB)/8, $0xffff010101000000
1442DATA expandAVX512_28_inShuf1<>+0x08(SB)/8, $0xffff010101000000
1443DATA expandAVX512_28_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
1444DATA expandAVX512_28_inShuf1<>+0x18(SB)/8, $0xffff010101000000
1445DATA expandAVX512_28_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
1446DATA expandAVX512_28_inShuf1<>+0x28(SB)/8, $0xffffffffffffff02
1447DATA expandAVX512_28_inShuf1<>+0x30(SB)/8, $0x0404040303030202
1448DATA expandAVX512_28_inShuf1<>+0x38(SB)/8, $0xffffffffff040302
1449
1450GLOBL expandAVX512_28_mat1<>(SB), RODATA, $0x40
1451DATA expandAVX512_28_mat1<>+0x00(SB)/8, $0x2020202020202020
1452DATA expandAVX512_28_mat1<>+0x08(SB)/8, $0x4040404040404040
1453DATA expandAVX512_28_mat1<>+0x10(SB)/8, $0x4040404080808080
1454DATA expandAVX512_28_mat1<>+0x18(SB)/8, $0x8080808080808080
1455DATA expandAVX512_28_mat1<>+0x20(SB)/8, $0x0101010101010101
1456DATA expandAVX512_28_mat1<>+0x28(SB)/8, $0x0202020202020202
1457DATA expandAVX512_28_mat1<>+0x30(SB)/8, $0x0404040404040404
1458DATA expandAVX512_28_mat1<>+0x38(SB)/8, $0x0404040408080808
1459
1460GLOBL expandAVX512_28_inShuf2<>(SB), RODATA, $0x40
1461DATA expandAVX512_28_inShuf2<>+0x00(SB)/8, $0x0404030303020202
1462DATA expandAVX512_28_inShuf2<>+0x08(SB)/8, $0x0404030303020202
1463DATA expandAVX512_28_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
1464DATA expandAVX512_28_inShuf2<>+0x18(SB)/8, $0xffff030303020202
1465DATA expandAVX512_28_inShuf2<>+0x20(SB)/8, $0xffff030303020202
1466DATA expandAVX512_28_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
1467DATA expandAVX512_28_inShuf2<>+0x30(SB)/8, $0xffff030303020202
1468DATA expandAVX512_28_inShuf2<>+0x38(SB)/8, $0xffff040404030303
1469
1470GLOBL expandAVX512_28_mat2<>(SB), RODATA, $0x40
1471DATA expandAVX512_28_mat2<>+0x00(SB)/8, $0x0808080808080808
1472DATA expandAVX512_28_mat2<>+0x08(SB)/8, $0x1010101010101010
1473DATA expandAVX512_28_mat2<>+0x10(SB)/8, $0x1010101020202020
1474DATA expandAVX512_28_mat2<>+0x18(SB)/8, $0x2020202020202020
1475DATA expandAVX512_28_mat2<>+0x20(SB)/8, $0x4040404040404040
1476DATA expandAVX512_28_mat2<>+0x28(SB)/8, $0x4040404080808080
1477DATA expandAVX512_28_mat2<>+0x30(SB)/8, $0x8080808080808080
1478DATA expandAVX512_28_mat2<>+0x38(SB)/8, $0x0101010101010101
1479
1480GLOBL expandAVX512_28_inShuf3<>(SB), RODATA, $0x40
1481DATA expandAVX512_28_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
1482DATA expandAVX512_28_inShuf3<>+0x08(SB)/8, $0xffff040404030303
1483DATA expandAVX512_28_inShuf3<>+0x10(SB)/8, $0xffffffffffffff04
1484DATA expandAVX512_28_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
1485DATA expandAVX512_28_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
1486DATA expandAVX512_28_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
1487DATA expandAVX512_28_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1488DATA expandAVX512_28_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1489
1490GLOBL expandAVX512_28_mat3<>(SB), RODATA, $0x40
1491DATA expandAVX512_28_mat3<>+0x00(SB)/8, $0x0101010102020202
1492DATA expandAVX512_28_mat3<>+0x08(SB)/8, $0x0202020202020202
1493DATA expandAVX512_28_mat3<>+0x10(SB)/8, $0x0808080808080808
1494DATA expandAVX512_28_mat3<>+0x18(SB)/8, $0x0000000000000000
1495DATA expandAVX512_28_mat3<>+0x20(SB)/8, $0x0000000000000000
1496DATA expandAVX512_28_mat3<>+0x28(SB)/8, $0x0000000000000000
1497DATA expandAVX512_28_mat3<>+0x30(SB)/8, $0x0000000000000000
1498DATA expandAVX512_28_mat3<>+0x38(SB)/8, $0x0000000000000000
1499
1500GLOBL expandAVX512_28_outShufLo(SB), RODATA, $0x40
1501DATA expandAVX512_28_outShufLo+0x00(SB)/8, $0x1812111008020100
1502DATA expandAVX512_28_outShufLo+0x08(SB)/8, $0x31302a2928201a19
1503DATA expandAVX512_28_outShufLo+0x10(SB)/8, $0x4a49484241403832
1504DATA expandAVX512_28_outShufLo+0x18(SB)/8, $0x090504035a595850
1505DATA expandAVX512_28_outShufLo+0x20(SB)/8, $0x2b211d1c1b151413
1506DATA expandAVX512_28_outShufLo+0x28(SB)/8, $0x4443393534332d2c
1507DATA expandAVX512_28_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b45
1508DATA expandAVX512_28_outShufLo+0x38(SB)/8, $0x1e6817160a600706
1509
1510GLOBL expandAVX512_28_outShufHi0(SB), RODATA, $0x40
1511DATA expandAVX512_28_outShufHi0+0x00(SB)/8, $0x4948424140383130
1512DATA expandAVX512_28_outShufHi0+0x08(SB)/8, $0x6261605a5958504a
1513DATA expandAVX512_28_outShufHi0+0x10(SB)/8, $0xff7a797872717068
1514DATA expandAVX512_28_outShufHi0+0x18(SB)/8, $0x4339343332ffffff
1515DATA expandAVX512_28_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b4544
1516DATA expandAVX512_28_outShufHi0+0x28(SB)/8, $0x757473696564635d
1517DATA expandAVX512_28_outShufHi0+0x30(SB)/8, $0x35ffffffff7d7c7b
1518DATA expandAVX512_28_outShufHi0+0x38(SB)/8, $0x4f4eff47463a3736
1519
1520GLOBL expandAVX512_28_outShufHi1(SB), RODATA, $0x40
1521DATA expandAVX512_28_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1522DATA expandAVX512_28_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1523DATA expandAVX512_28_outShufHi1+0x10(SB)/8, $0x00ffffffffffffff
1524DATA expandAVX512_28_outShufHi1+0x18(SB)/8, $0xffffffffff0a0908
1525DATA expandAVX512_28_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1526DATA expandAVX512_28_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
1527DATA expandAVX512_28_outShufHi1+0x30(SB)/8, $0xff0d0c0b01ffffff
1528DATA expandAVX512_28_outShufHi1+0x38(SB)/8, $0xffff10ffffffffff
1529
1530TEXT expandAVX512_28<>(SB), NOSPLIT, $0-0
1531 VMOVDQU64 expandAVX512_28_inShuf0<>(SB), Z0
1532 VMOVDQU64 expandAVX512_28_inShuf1<>(SB), Z2
1533 VMOVDQU64 expandAVX512_28_inShuf2<>(SB), Z3
1534 VMOVDQU64 expandAVX512_28_inShuf3<>(SB), Z4
1535 VMOVDQU64 expandAVX512_28_outShufLo(SB), Z1
1536 VMOVDQU64 expandAVX512_28_outShufHi0(SB), Z5
1537 VMOVDQU64 expandAVX512_28_outShufHi1(SB), Z6
1538 VMOVDQU64 (AX), Z7
1539 VPERMB Z7, Z0, Z0
1540 VGF2P8AFFINEQB $0, expandAVX512_28_mat0<>(SB), Z0, Z0
1541 VPERMB Z7, Z2, Z2
1542 VGF2P8AFFINEQB $0, expandAVX512_28_mat1<>(SB), Z2, Z2
1543 VPERMB Z7, Z3, Z3
1544 VGF2P8AFFINEQB $0, expandAVX512_28_mat2<>(SB), Z3, Z3
1545 VPERMB Z7, Z4, Z4
1546 VGF2P8AFFINEQB $0, expandAVX512_28_mat3<>(SB), Z4, Z4
1547 VPERMI2B Z2, Z0, Z1
1548 MOVQ $0xdf87fffff87fffff, AX
1549 KMOVQ AX, K1
1550 VPERMI2B.Z Z3, Z2, K1, Z5
1551 MOVQ $0x2078000007800000, AX
1552 KMOVQ AX, K1
1553 VPERMB.Z Z4, Z6, K1, Z0
1554 VPORQ Z0, Z5, Z2
1555 RET
1556
1557GLOBL expandAVX512_30_inShuf0<>(SB), RODATA, $0x40
1558DATA expandAVX512_30_inShuf0<>+0x00(SB)/8, $0x0202010101000000
1559DATA expandAVX512_30_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
1560DATA expandAVX512_30_inShuf0<>+0x10(SB)/8, $0xffff010101000000
1561DATA expandAVX512_30_inShuf0<>+0x18(SB)/8, $0xffffffffffff0100
1562DATA expandAVX512_30_inShuf0<>+0x20(SB)/8, $0xffff010101000000
1563DATA expandAVX512_30_inShuf0<>+0x28(SB)/8, $0xffffffffffff0100
1564DATA expandAVX512_30_inShuf0<>+0x30(SB)/8, $0xffff010101000000
1565DATA expandAVX512_30_inShuf0<>+0x38(SB)/8, $0xffff010101000000
1566
1567GLOBL expandAVX512_30_mat0<>(SB), RODATA, $0x40
1568DATA expandAVX512_30_mat0<>+0x00(SB)/8, $0x0101010101010101
1569DATA expandAVX512_30_mat0<>+0x08(SB)/8, $0x0101010101010202
1570DATA expandAVX512_30_mat0<>+0x10(SB)/8, $0x0202020202020202
1571DATA expandAVX512_30_mat0<>+0x18(SB)/8, $0x0202020204040404
1572DATA expandAVX512_30_mat0<>+0x20(SB)/8, $0x0404040404040404
1573DATA expandAVX512_30_mat0<>+0x28(SB)/8, $0x0404080808080808
1574DATA expandAVX512_30_mat0<>+0x30(SB)/8, $0x0808080808080808
1575DATA expandAVX512_30_mat0<>+0x38(SB)/8, $0x1010101010101010
1576
1577GLOBL expandAVX512_30_inShuf1<>(SB), RODATA, $0x40
1578DATA expandAVX512_30_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
1579DATA expandAVX512_30_inShuf1<>+0x08(SB)/8, $0xffff010101000000
1580DATA expandAVX512_30_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
1581DATA expandAVX512_30_inShuf1<>+0x18(SB)/8, $0xffff010101000000
1582DATA expandAVX512_30_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
1583DATA expandAVX512_30_inShuf1<>+0x28(SB)/8, $0xffff010101000000
1584DATA expandAVX512_30_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
1585DATA expandAVX512_30_inShuf1<>+0x38(SB)/8, $0x0404030303020202
1586
1587GLOBL expandAVX512_30_mat1<>(SB), RODATA, $0x40
1588DATA expandAVX512_30_mat1<>+0x00(SB)/8, $0x1010101010102020
1589DATA expandAVX512_30_mat1<>+0x08(SB)/8, $0x2020202020202020
1590DATA expandAVX512_30_mat1<>+0x10(SB)/8, $0x2020202040404040
1591DATA expandAVX512_30_mat1<>+0x18(SB)/8, $0x4040404040404040
1592DATA expandAVX512_30_mat1<>+0x20(SB)/8, $0x4040808080808080
1593DATA expandAVX512_30_mat1<>+0x28(SB)/8, $0x8080808080808080
1594DATA expandAVX512_30_mat1<>+0x30(SB)/8, $0x0101010101010101
1595DATA expandAVX512_30_mat1<>+0x38(SB)/8, $0x0202020202020202
1596
1597GLOBL expandAVX512_30_inShuf2<>(SB), RODATA, $0x40
1598DATA expandAVX512_30_inShuf2<>+0x00(SB)/8, $0xffffffffff040302
1599DATA expandAVX512_30_inShuf2<>+0x08(SB)/8, $0xffff030303020202
1600DATA expandAVX512_30_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
1601DATA expandAVX512_30_inShuf2<>+0x18(SB)/8, $0xffff030303020202
1602DATA expandAVX512_30_inShuf2<>+0x20(SB)/8, $0xffff030303020202
1603DATA expandAVX512_30_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
1604DATA expandAVX512_30_inShuf2<>+0x30(SB)/8, $0xffff030303020202
1605DATA expandAVX512_30_inShuf2<>+0x38(SB)/8, $0xffffffffffff0302
1606
1607GLOBL expandAVX512_30_mat2<>(SB), RODATA, $0x40
1608DATA expandAVX512_30_mat2<>+0x00(SB)/8, $0x0202020204040404
1609DATA expandAVX512_30_mat2<>+0x08(SB)/8, $0x0404040404040404
1610DATA expandAVX512_30_mat2<>+0x10(SB)/8, $0x0404080808080808
1611DATA expandAVX512_30_mat2<>+0x18(SB)/8, $0x0808080808080808
1612DATA expandAVX512_30_mat2<>+0x20(SB)/8, $0x1010101010101010
1613DATA expandAVX512_30_mat2<>+0x28(SB)/8, $0x1010101010102020
1614DATA expandAVX512_30_mat2<>+0x30(SB)/8, $0x2020202020202020
1615DATA expandAVX512_30_mat2<>+0x38(SB)/8, $0x2020202040404040
1616
1617GLOBL expandAVX512_30_inShuf3<>(SB), RODATA, $0x40
1618DATA expandAVX512_30_inShuf3<>+0x00(SB)/8, $0xffff030303020202
1619DATA expandAVX512_30_inShuf3<>+0x08(SB)/8, $0xffffffffffff0302
1620DATA expandAVX512_30_inShuf3<>+0x10(SB)/8, $0xffff030303020202
1621DATA expandAVX512_30_inShuf3<>+0x18(SB)/8, $0xffff040404030303
1622DATA expandAVX512_30_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
1623DATA expandAVX512_30_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
1624DATA expandAVX512_30_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1625DATA expandAVX512_30_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1626
1627GLOBL expandAVX512_30_mat3<>(SB), RODATA, $0x40
1628DATA expandAVX512_30_mat3<>+0x00(SB)/8, $0x4040404040404040
1629DATA expandAVX512_30_mat3<>+0x08(SB)/8, $0x4040808080808080
1630DATA expandAVX512_30_mat3<>+0x10(SB)/8, $0x8080808080808080
1631DATA expandAVX512_30_mat3<>+0x18(SB)/8, $0x0101010101010101
1632DATA expandAVX512_30_mat3<>+0x20(SB)/8, $0x0101010101010202
1633DATA expandAVX512_30_mat3<>+0x28(SB)/8, $0x0202020202020202
1634DATA expandAVX512_30_mat3<>+0x30(SB)/8, $0x0000000000000000
1635DATA expandAVX512_30_mat3<>+0x38(SB)/8, $0x0000000000000000
1636
1637GLOBL expandAVX512_30_outShufLo(SB), RODATA, $0x40
1638DATA expandAVX512_30_outShufLo+0x00(SB)/8, $0x1812111008020100
1639DATA expandAVX512_30_outShufLo+0x08(SB)/8, $0x3832313028222120
1640DATA expandAVX512_30_outShufLo+0x10(SB)/8, $0x58504a4948403a39
1641DATA expandAVX512_30_outShufLo+0x18(SB)/8, $0x04036a6968605a59
1642DATA expandAVX512_30_outShufLo+0x20(SB)/8, $0x2423191514130905
1643DATA expandAVX512_30_outShufLo+0x28(SB)/8, $0x3d3c3b3534332925
1644DATA expandAVX512_30_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b41
1645DATA expandAVX512_30_outShufLo+0x38(SB)/8, $0x0a7007066d6c6b61
1646
1647GLOBL expandAVX512_30_outShufHi0(SB), RODATA, $0x40
1648DATA expandAVX512_30_outShufHi0+0x00(SB)/8, $0x504a4948403a3938
1649DATA expandAVX512_30_outShufHi0+0x08(SB)/8, $0x70686261605a5958
1650DATA expandAVX512_30_outShufHi0+0x10(SB)/8, $0xffffffffff787271
1651DATA expandAVX512_30_outShufHi0+0x18(SB)/8, $0x3c3bffffffffffff
1652DATA expandAVX512_30_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b413d
1653DATA expandAVX512_30_outShufHi0+0x28(SB)/8, $0x757473696564635d
1654DATA expandAVX512_30_outShufHi0+0x30(SB)/8, $0xffffffffffffff79
1655DATA expandAVX512_30_outShufHi0+0x38(SB)/8, $0x42ff3f3effffffff
1656
1657GLOBL expandAVX512_30_outShufHi1(SB), RODATA, $0x40
1658DATA expandAVX512_30_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1659DATA expandAVX512_30_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1660DATA expandAVX512_30_outShufHi1+0x10(SB)/8, $0x1008020100ffffff
1661DATA expandAVX512_30_outShufHi1+0x18(SB)/8, $0xffff201a19181211
1662DATA expandAVX512_30_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1663DATA expandAVX512_30_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
1664DATA expandAVX512_30_outShufHi1+0x30(SB)/8, $0x15141309050403ff
1665DATA expandAVX512_30_outShufHi1+0x38(SB)/8, $0xff28ffff211d1c1b
1666
1667TEXT expandAVX512_30<>(SB), NOSPLIT, $0-0
1668 VMOVDQU64 expandAVX512_30_inShuf0<>(SB), Z0
1669 VMOVDQU64 expandAVX512_30_inShuf1<>(SB), Z2
1670 VMOVDQU64 expandAVX512_30_inShuf2<>(SB), Z3
1671 VMOVDQU64 expandAVX512_30_inShuf3<>(SB), Z4
1672 VMOVDQU64 expandAVX512_30_outShufLo(SB), Z1
1673 VMOVDQU64 expandAVX512_30_outShufHi0(SB), Z5
1674 VMOVDQU64 expandAVX512_30_outShufHi1(SB), Z6
1675 VMOVDQU64 (AX), Z7
1676 VPERMB Z7, Z0, Z0
1677 VGF2P8AFFINEQB $0, expandAVX512_30_mat0<>(SB), Z0, Z0
1678 VPERMB Z7, Z2, Z2
1679 VGF2P8AFFINEQB $0, expandAVX512_30_mat1<>(SB), Z2, Z2
1680 VPERMB Z7, Z3, Z3
1681 VGF2P8AFFINEQB $0, expandAVX512_30_mat2<>(SB), Z3, Z3
1682 VPERMB Z7, Z4, Z4
1683 VGF2P8AFFINEQB $0, expandAVX512_30_mat3<>(SB), Z4, Z4
1684 VPERMI2B Z2, Z0, Z1
1685 MOVQ $0xb001ffffc007ffff, AX
1686 KMOVQ AX, K1
1687 VPERMI2B.Z Z3, Z2, K1, Z5
1688 MOVQ $0x4ffe00003ff80000, AX
1689 KMOVQ AX, K1
1690 VPERMB.Z Z4, Z6, K1, Z0
1691 VPORQ Z0, Z5, Z2
1692 RET
1693
1694GLOBL expandAVX512_32_inShuf0<>(SB), RODATA, $0x40
1695DATA expandAVX512_32_inShuf0<>+0x00(SB)/8, $0x0101010100000000
1696DATA expandAVX512_32_inShuf0<>+0x08(SB)/8, $0x0101010100000000
1697DATA expandAVX512_32_inShuf0<>+0x10(SB)/8, $0x0101010100000000
1698DATA expandAVX512_32_inShuf0<>+0x18(SB)/8, $0x0101010100000000
1699DATA expandAVX512_32_inShuf0<>+0x20(SB)/8, $0x0101010100000000
1700DATA expandAVX512_32_inShuf0<>+0x28(SB)/8, $0x0101010100000000
1701DATA expandAVX512_32_inShuf0<>+0x30(SB)/8, $0x0101010100000000
1702DATA expandAVX512_32_inShuf0<>+0x38(SB)/8, $0x0101010100000000
1703
1704GLOBL expandAVX512_32_mat0<>(SB), RODATA, $0x40
1705DATA expandAVX512_32_mat0<>+0x00(SB)/8, $0x0101010101010101
1706DATA expandAVX512_32_mat0<>+0x08(SB)/8, $0x0202020202020202
1707DATA expandAVX512_32_mat0<>+0x10(SB)/8, $0x0404040404040404
1708DATA expandAVX512_32_mat0<>+0x18(SB)/8, $0x0808080808080808
1709DATA expandAVX512_32_mat0<>+0x20(SB)/8, $0x1010101010101010
1710DATA expandAVX512_32_mat0<>+0x28(SB)/8, $0x2020202020202020
1711DATA expandAVX512_32_mat0<>+0x30(SB)/8, $0x4040404040404040
1712DATA expandAVX512_32_mat0<>+0x38(SB)/8, $0x8080808080808080
1713
1714GLOBL expandAVX512_32_inShuf1<>(SB), RODATA, $0x40
1715DATA expandAVX512_32_inShuf1<>+0x00(SB)/8, $0x0303030302020202
1716DATA expandAVX512_32_inShuf1<>+0x08(SB)/8, $0x0303030302020202
1717DATA expandAVX512_32_inShuf1<>+0x10(SB)/8, $0x0303030302020202
1718DATA expandAVX512_32_inShuf1<>+0x18(SB)/8, $0x0303030302020202
1719DATA expandAVX512_32_inShuf1<>+0x20(SB)/8, $0x0303030302020202
1720DATA expandAVX512_32_inShuf1<>+0x28(SB)/8, $0x0303030302020202
1721DATA expandAVX512_32_inShuf1<>+0x30(SB)/8, $0x0303030302020202
1722DATA expandAVX512_32_inShuf1<>+0x38(SB)/8, $0x0303030302020202
1723
1724GLOBL expandAVX512_32_outShufLo(SB), RODATA, $0x40
1725DATA expandAVX512_32_outShufLo+0x00(SB)/8, $0x0b0a090803020100
1726DATA expandAVX512_32_outShufLo+0x08(SB)/8, $0x1b1a191813121110
1727DATA expandAVX512_32_outShufLo+0x10(SB)/8, $0x2b2a292823222120
1728DATA expandAVX512_32_outShufLo+0x18(SB)/8, $0x3b3a393833323130
1729DATA expandAVX512_32_outShufLo+0x20(SB)/8, $0x0f0e0d0c07060504
1730DATA expandAVX512_32_outShufLo+0x28(SB)/8, $0x1f1e1d1c17161514
1731DATA expandAVX512_32_outShufLo+0x30(SB)/8, $0x2f2e2d2c27262524
1732DATA expandAVX512_32_outShufLo+0x38(SB)/8, $0x3f3e3d3c37363534
1733
1734TEXT expandAVX512_32<>(SB), NOSPLIT, $0-0
1735 VMOVDQU64 expandAVX512_32_inShuf0<>(SB), Z0
1736 VMOVDQU64 expandAVX512_32_mat0<>(SB), Z1
1737 VMOVDQU64 expandAVX512_32_inShuf1<>(SB), Z2
1738 VMOVDQU64 expandAVX512_32_outShufLo(SB), Z3
1739 VMOVDQU64 (AX), Z4
1740 VPERMB Z4, Z0, Z0
1741 VGF2P8AFFINEQB $0, Z1, Z0, Z0
1742 VPERMB Z4, Z2, Z2
1743 VGF2P8AFFINEQB $0, Z1, Z2, Z2
1744 VPERMB Z0, Z3, Z1
1745 VPERMB Z2, Z3, Z2
1746 RET
1747
1748GLOBL expandAVX512_36_inShuf0<>(SB), RODATA, $0x40
1749DATA expandAVX512_36_inShuf0<>+0x00(SB)/8, $0x0101010100000000
1750DATA expandAVX512_36_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
1751DATA expandAVX512_36_inShuf0<>+0x10(SB)/8, $0x0101010100000000
1752DATA expandAVX512_36_inShuf0<>+0x18(SB)/8, $0x0101010100000000
1753DATA expandAVX512_36_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
1754DATA expandAVX512_36_inShuf0<>+0x28(SB)/8, $0x0101010100000000
1755DATA expandAVX512_36_inShuf0<>+0x30(SB)/8, $0x0101010100000000
1756DATA expandAVX512_36_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
1757
1758GLOBL expandAVX512_36_mat0<>(SB), RODATA, $0x40
1759DATA expandAVX512_36_mat0<>+0x00(SB)/8, $0x0101010101010101
1760DATA expandAVX512_36_mat0<>+0x08(SB)/8, $0x0101010102020202
1761DATA expandAVX512_36_mat0<>+0x10(SB)/8, $0x0202020202020202
1762DATA expandAVX512_36_mat0<>+0x18(SB)/8, $0x0404040404040404
1763DATA expandAVX512_36_mat0<>+0x20(SB)/8, $0x0404040408080808
1764DATA expandAVX512_36_mat0<>+0x28(SB)/8, $0x0808080808080808
1765DATA expandAVX512_36_mat0<>+0x30(SB)/8, $0x1010101010101010
1766DATA expandAVX512_36_mat0<>+0x38(SB)/8, $0x1010101020202020
1767
1768GLOBL expandAVX512_36_inShuf1<>(SB), RODATA, $0x40
1769DATA expandAVX512_36_inShuf1<>+0x00(SB)/8, $0x0101010100000000
1770DATA expandAVX512_36_inShuf1<>+0x08(SB)/8, $0xffffff0100000000
1771DATA expandAVX512_36_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
1772DATA expandAVX512_36_inShuf1<>+0x18(SB)/8, $0xffffffff00000000
1773DATA expandAVX512_36_inShuf1<>+0x20(SB)/8, $0xff02020202010101
1774DATA expandAVX512_36_inShuf1<>+0x28(SB)/8, $0xffffffffffff0201
1775DATA expandAVX512_36_inShuf1<>+0x30(SB)/8, $0x0202020201010101
1776DATA expandAVX512_36_inShuf1<>+0x38(SB)/8, $0x0303030302020202
1777
1778GLOBL expandAVX512_36_mat1<>(SB), RODATA, $0x40
1779DATA expandAVX512_36_mat1<>+0x00(SB)/8, $0x2020202020202020
1780DATA expandAVX512_36_mat1<>+0x08(SB)/8, $0x4040404040404040
1781DATA expandAVX512_36_mat1<>+0x10(SB)/8, $0x4040404080808080
1782DATA expandAVX512_36_mat1<>+0x18(SB)/8, $0x8080808080808080
1783DATA expandAVX512_36_mat1<>+0x20(SB)/8, $0x4040404040404040
1784DATA expandAVX512_36_mat1<>+0x28(SB)/8, $0x4040404080808080
1785DATA expandAVX512_36_mat1<>+0x30(SB)/8, $0x8080808080808080
1786DATA expandAVX512_36_mat1<>+0x38(SB)/8, $0x0101010101010101
1787
1788GLOBL expandAVX512_36_inShuf2<>(SB), RODATA, $0x40
1789DATA expandAVX512_36_inShuf2<>+0x00(SB)/8, $0xffffffffffff0302
1790DATA expandAVX512_36_inShuf2<>+0x08(SB)/8, $0x0303030302020202
1791DATA expandAVX512_36_inShuf2<>+0x10(SB)/8, $0x0303030302020202
1792DATA expandAVX512_36_inShuf2<>+0x18(SB)/8, $0xffffffffffff0302
1793DATA expandAVX512_36_inShuf2<>+0x20(SB)/8, $0x0303030302020202
1794DATA expandAVX512_36_inShuf2<>+0x28(SB)/8, $0xffff030302020202
1795DATA expandAVX512_36_inShuf2<>+0x30(SB)/8, $0xffffffffffffff02
1796DATA expandAVX512_36_inShuf2<>+0x38(SB)/8, $0xffffffff02020202
1797
1798GLOBL expandAVX512_36_mat2<>(SB), RODATA, $0x40
1799DATA expandAVX512_36_mat2<>+0x00(SB)/8, $0x0101010102020202
1800DATA expandAVX512_36_mat2<>+0x08(SB)/8, $0x0202020202020202
1801DATA expandAVX512_36_mat2<>+0x10(SB)/8, $0x0404040404040404
1802DATA expandAVX512_36_mat2<>+0x18(SB)/8, $0x0404040408080808
1803DATA expandAVX512_36_mat2<>+0x20(SB)/8, $0x0808080808080808
1804DATA expandAVX512_36_mat2<>+0x28(SB)/8, $0x1010101010101010
1805DATA expandAVX512_36_mat2<>+0x30(SB)/8, $0x1010101020202020
1806DATA expandAVX512_36_mat2<>+0x38(SB)/8, $0x2020202020202020
1807
1808GLOBL expandAVX512_36_outShufLo(SB), RODATA, $0x40
1809DATA expandAVX512_36_outShufLo+0x00(SB)/8, $0x1211100803020100
1810DATA expandAVX512_36_outShufLo+0x08(SB)/8, $0x2928201b1a191813
1811DATA expandAVX512_36_outShufLo+0x10(SB)/8, $0x4038333231302b2a
1812DATA expandAVX512_36_outShufLo+0x18(SB)/8, $0x504b4a4948434241
1813DATA expandAVX512_36_outShufLo+0x20(SB)/8, $0x070605045b5a5958
1814DATA expandAVX512_36_outShufLo+0x28(SB)/8, $0x1e1d1c1716151409
1815DATA expandAVX512_36_outShufLo+0x30(SB)/8, $0x35342f2e2d2c211f
1816DATA expandAVX512_36_outShufLo+0x38(SB)/8, $0x4c47464544393736
1817
1818GLOBL expandAVX512_36_outShufHi(SB), RODATA, $0x40
1819DATA expandAVX512_36_outShufHi+0x00(SB)/8, $0x3332313028222120
1820DATA expandAVX512_36_outShufHi+0x08(SB)/8, $0x4a4948403b3a3938
1821DATA expandAVX512_36_outShufHi+0x10(SB)/8, $0x616058535251504b
1822DATA expandAVX512_36_outShufHi+0x18(SB)/8, $0x78706b6a69686362
1823DATA expandAVX512_36_outShufHi+0x20(SB)/8, $0x29262524237b7a79
1824DATA expandAVX512_36_outShufHi+0x28(SB)/8, $0x3f3e3d3c37363534
1825DATA expandAVX512_36_outShufHi+0x30(SB)/8, $0x5655544f4e4d4c41
1826DATA expandAVX512_36_outShufHi+0x38(SB)/8, $0x6d6c676665645957
1827
1828TEXT expandAVX512_36<>(SB), NOSPLIT, $0-0
1829 VMOVDQU64 expandAVX512_36_inShuf0<>(SB), Z0
1830 VMOVDQU64 expandAVX512_36_inShuf1<>(SB), Z3
1831 VMOVDQU64 expandAVX512_36_inShuf2<>(SB), Z4
1832 VMOVDQU64 expandAVX512_36_outShufLo(SB), Z1
1833 VMOVDQU64 expandAVX512_36_outShufHi(SB), Z2
1834 VMOVDQU64 (AX), Z5
1835 VPERMB Z5, Z0, Z0
1836 VGF2P8AFFINEQB $0, expandAVX512_36_mat0<>(SB), Z0, Z0
1837 VPERMB Z5, Z3, Z3
1838 VGF2P8AFFINEQB $0, expandAVX512_36_mat1<>(SB), Z3, Z3
1839 VPERMB Z5, Z4, Z4
1840 VGF2P8AFFINEQB $0, expandAVX512_36_mat2<>(SB), Z4, Z4
1841 VPERMI2B Z3, Z0, Z1
1842 VPERMI2B Z4, Z3, Z2
1843 RET
1844
1845GLOBL expandAVX512_40_inShuf0<>(SB), RODATA, $0x40
1846DATA expandAVX512_40_inShuf0<>+0x00(SB)/8, $0x0101010000000000
1847DATA expandAVX512_40_inShuf0<>+0x08(SB)/8, $0x0101010000000000
1848DATA expandAVX512_40_inShuf0<>+0x10(SB)/8, $0x0101010000000000
1849DATA expandAVX512_40_inShuf0<>+0x18(SB)/8, $0x0101010000000000
1850DATA expandAVX512_40_inShuf0<>+0x20(SB)/8, $0x0101010000000000
1851DATA expandAVX512_40_inShuf0<>+0x28(SB)/8, $0xffffff0000000000
1852DATA expandAVX512_40_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
1853DATA expandAVX512_40_inShuf0<>+0x38(SB)/8, $0xffffff0000000000
1854
1855GLOBL expandAVX512_40_mat0<>(SB), RODATA, $0x40
1856DATA expandAVX512_40_mat0<>+0x00(SB)/8, $0x0101010101010101
1857DATA expandAVX512_40_mat0<>+0x08(SB)/8, $0x0202020202020202
1858DATA expandAVX512_40_mat0<>+0x10(SB)/8, $0x0404040404040404
1859DATA expandAVX512_40_mat0<>+0x18(SB)/8, $0x0808080808080808
1860DATA expandAVX512_40_mat0<>+0x20(SB)/8, $0x1010101010101010
1861DATA expandAVX512_40_mat0<>+0x28(SB)/8, $0x2020202020202020
1862DATA expandAVX512_40_mat0<>+0x30(SB)/8, $0x4040404040404040
1863DATA expandAVX512_40_mat0<>+0x38(SB)/8, $0x8080808080808080
1864
1865GLOBL expandAVX512_40_inShuf1<>(SB), RODATA, $0x40
1866DATA expandAVX512_40_inShuf1<>+0x00(SB)/8, $0xffffffffffff0101
1867DATA expandAVX512_40_inShuf1<>+0x08(SB)/8, $0xffffffffffff0101
1868DATA expandAVX512_40_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
1869DATA expandAVX512_40_inShuf1<>+0x18(SB)/8, $0xffffffffffff0101
1870DATA expandAVX512_40_inShuf1<>+0x20(SB)/8, $0xffffffffffffff01
1871DATA expandAVX512_40_inShuf1<>+0x28(SB)/8, $0xffff020202020201
1872DATA expandAVX512_40_inShuf1<>+0x30(SB)/8, $0x0202020101010101
1873DATA expandAVX512_40_inShuf1<>+0x38(SB)/8, $0x0202020101010101
1874
1875GLOBL expandAVX512_40_mat1<>(SB), RODATA, $0x40
1876DATA expandAVX512_40_mat1<>+0x00(SB)/8, $0x0101010101010101
1877DATA expandAVX512_40_mat1<>+0x08(SB)/8, $0x0202020202020202
1878DATA expandAVX512_40_mat1<>+0x10(SB)/8, $0x0404040404040404
1879DATA expandAVX512_40_mat1<>+0x18(SB)/8, $0x0808080808080808
1880DATA expandAVX512_40_mat1<>+0x20(SB)/8, $0x1010101010101010
1881DATA expandAVX512_40_mat1<>+0x28(SB)/8, $0x1010101010101010
1882DATA expandAVX512_40_mat1<>+0x30(SB)/8, $0x2020202020202020
1883DATA expandAVX512_40_mat1<>+0x38(SB)/8, $0x4040404040404040
1884
1885GLOBL expandAVX512_40_inShuf2<>(SB), RODATA, $0x40
1886DATA expandAVX512_40_inShuf2<>+0x00(SB)/8, $0x0202020101010101
1887DATA expandAVX512_40_inShuf2<>+0x08(SB)/8, $0x0303030202020202
1888DATA expandAVX512_40_inShuf2<>+0x10(SB)/8, $0x0303030202020202
1889DATA expandAVX512_40_inShuf2<>+0x18(SB)/8, $0xffffff0202020202
1890DATA expandAVX512_40_inShuf2<>+0x20(SB)/8, $0xffffff0202020202
1891DATA expandAVX512_40_inShuf2<>+0x28(SB)/8, $0xffffffffffff0202
1892DATA expandAVX512_40_inShuf2<>+0x30(SB)/8, $0xffffffffffff0202
1893DATA expandAVX512_40_inShuf2<>+0x38(SB)/8, $0xffffffffffff0202
1894
1895GLOBL expandAVX512_40_mat2<>(SB), RODATA, $0x40
1896DATA expandAVX512_40_mat2<>+0x00(SB)/8, $0x8080808080808080
1897DATA expandAVX512_40_mat2<>+0x08(SB)/8, $0x0101010101010101
1898DATA expandAVX512_40_mat2<>+0x10(SB)/8, $0x0202020202020202
1899DATA expandAVX512_40_mat2<>+0x18(SB)/8, $0x0404040404040404
1900DATA expandAVX512_40_mat2<>+0x20(SB)/8, $0x0808080808080808
1901DATA expandAVX512_40_mat2<>+0x28(SB)/8, $0x2020202020202020
1902DATA expandAVX512_40_mat2<>+0x30(SB)/8, $0x4040404040404040
1903DATA expandAVX512_40_mat2<>+0x38(SB)/8, $0x8080808080808080
1904
1905GLOBL expandAVX512_40_inShuf3<>(SB), RODATA, $0x40
1906DATA expandAVX512_40_inShuf3<>+0x00(SB)/8, $0xffffffffffff0303
1907DATA expandAVX512_40_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
1908DATA expandAVX512_40_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
1909DATA expandAVX512_40_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
1910DATA expandAVX512_40_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
1911DATA expandAVX512_40_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
1912DATA expandAVX512_40_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1913DATA expandAVX512_40_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1914
1915GLOBL expandAVX512_40_mat3<>(SB), RODATA, $0x40
1916DATA expandAVX512_40_mat3<>+0x00(SB)/8, $0x0101010101010101
1917DATA expandAVX512_40_mat3<>+0x08(SB)/8, $0x0000000000000000
1918DATA expandAVX512_40_mat3<>+0x10(SB)/8, $0x0000000000000000
1919DATA expandAVX512_40_mat3<>+0x18(SB)/8, $0x0000000000000000
1920DATA expandAVX512_40_mat3<>+0x20(SB)/8, $0x0000000000000000
1921DATA expandAVX512_40_mat3<>+0x28(SB)/8, $0x0000000000000000
1922DATA expandAVX512_40_mat3<>+0x30(SB)/8, $0x0000000000000000
1923DATA expandAVX512_40_mat3<>+0x38(SB)/8, $0x0000000000000000
1924
1925GLOBL expandAVX512_40_outShufLo(SB), RODATA, $0x40
1926DATA expandAVX512_40_outShufLo+0x00(SB)/8, $0x0a09080403020100
1927DATA expandAVX512_40_outShufLo+0x08(SB)/8, $0x1814131211100c0b
1928DATA expandAVX512_40_outShufLo+0x10(SB)/8, $0x232221201c1b1a19
1929DATA expandAVX512_40_outShufLo+0x18(SB)/8, $0x31302c2b2a292824
1930DATA expandAVX512_40_outShufLo+0x20(SB)/8, $0x3c3b3a3938343332
1931DATA expandAVX512_40_outShufLo+0x28(SB)/8, $0x0f0e0d4140070605
1932DATA expandAVX512_40_outShufLo+0x30(SB)/8, $0x1d51501716154948
1933DATA expandAVX512_40_outShufLo+0x38(SB)/8, $0x6027262559581f1e
1934
1935GLOBL expandAVX512_40_outShufHi0(SB), RODATA, $0x40
1936DATA expandAVX512_40_outShufHi0+0x00(SB)/8, $0x3938343332313028
1937DATA expandAVX512_40_outShufHi0+0x08(SB)/8, $0x44434241403c3b3a
1938DATA expandAVX512_40_outShufHi0+0x10(SB)/8, $0x5251504c4b4a4948
1939DATA expandAVX512_40_outShufHi0+0x18(SB)/8, $0x605c5b5a59585453
1940DATA expandAVX512_40_outShufHi0+0x20(SB)/8, $0x2c2b2a2964636261
1941DATA expandAVX512_40_outShufHi0+0x28(SB)/8, $0x3e3d69683736352d
1942DATA expandAVX512_40_outShufHi0+0x30(SB)/8, $0x797847464571703f
1943DATA expandAVX512_40_outShufHi0+0x38(SB)/8, $0x575655ffff4f4e4d
1944
1945GLOBL expandAVX512_40_outShufHi1(SB), RODATA, $0x40
1946DATA expandAVX512_40_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1947DATA expandAVX512_40_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1948DATA expandAVX512_40_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
1949DATA expandAVX512_40_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
1950DATA expandAVX512_40_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1951DATA expandAVX512_40_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
1952DATA expandAVX512_40_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
1953DATA expandAVX512_40_outShufHi1+0x38(SB)/8, $0xffffff0100ffffff
1954
1955TEXT expandAVX512_40<>(SB), NOSPLIT, $0-0
1956 VMOVDQU64 expandAVX512_40_inShuf0<>(SB), Z0
1957 VMOVDQU64 expandAVX512_40_inShuf1<>(SB), Z2
1958 VMOVDQU64 expandAVX512_40_inShuf2<>(SB), Z3
1959 VMOVDQU64 expandAVX512_40_inShuf3<>(SB), Z4
1960 VMOVDQU64 expandAVX512_40_outShufLo(SB), Z1
1961 VMOVDQU64 expandAVX512_40_outShufHi0(SB), Z5
1962 VMOVDQU64 expandAVX512_40_outShufHi1(SB), Z6
1963 VMOVDQU64 (AX), Z7
1964 VPERMB Z7, Z0, Z0
1965 VGF2P8AFFINEQB $0, expandAVX512_40_mat0<>(SB), Z0, Z0
1966 VPERMB Z7, Z2, Z2
1967 VGF2P8AFFINEQB $0, expandAVX512_40_mat1<>(SB), Z2, Z2
1968 VPERMB Z7, Z3, Z3
1969 VGF2P8AFFINEQB $0, expandAVX512_40_mat2<>(SB), Z3, Z3
1970 VPERMB Z7, Z4, Z4
1971 VGF2P8AFFINEQB $0, expandAVX512_40_mat3<>(SB), Z4, Z4
1972 VPERMI2B Z2, Z0, Z1
1973 MOVQ $0xe7ffffffffffffff, AX
1974 KMOVQ AX, K1
1975 VPERMI2B.Z Z3, Z2, K1, Z5
1976 MOVQ $0x1800000000000000, AX
1977 KMOVQ AX, K1
1978 VPERMB.Z Z4, Z6, K1, Z0
1979 VPORQ Z0, Z5, Z2
1980 RET
1981
1982GLOBL expandAVX512_44_inShuf0<>(SB), RODATA, $0x40
1983DATA expandAVX512_44_inShuf0<>+0x00(SB)/8, $0x0101010000000000
1984DATA expandAVX512_44_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
1985DATA expandAVX512_44_inShuf0<>+0x10(SB)/8, $0x0101010000000000
1986DATA expandAVX512_44_inShuf0<>+0x18(SB)/8, $0x0101010000000000
1987DATA expandAVX512_44_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
1988DATA expandAVX512_44_inShuf0<>+0x28(SB)/8, $0x0101010000000000
1989DATA expandAVX512_44_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
1990DATA expandAVX512_44_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
1991
1992GLOBL expandAVX512_44_mat0<>(SB), RODATA, $0x40
1993DATA expandAVX512_44_mat0<>+0x00(SB)/8, $0x0101010101010101
1994DATA expandAVX512_44_mat0<>+0x08(SB)/8, $0x0101010102020202
1995DATA expandAVX512_44_mat0<>+0x10(SB)/8, $0x0202020202020202
1996DATA expandAVX512_44_mat0<>+0x18(SB)/8, $0x0404040404040404
1997DATA expandAVX512_44_mat0<>+0x20(SB)/8, $0x0404040408080808
1998DATA expandAVX512_44_mat0<>+0x28(SB)/8, $0x0808080808080808
1999DATA expandAVX512_44_mat0<>+0x30(SB)/8, $0x1010101010101010
2000DATA expandAVX512_44_mat0<>+0x38(SB)/8, $0x1010101020202020
2001
2002GLOBL expandAVX512_44_inShuf1<>(SB), RODATA, $0x40
2003DATA expandAVX512_44_inShuf1<>+0x00(SB)/8, $0xffffff0000000000
2004DATA expandAVX512_44_inShuf1<>+0x08(SB)/8, $0xffffff0000000000
2005DATA expandAVX512_44_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
2006DATA expandAVX512_44_inShuf1<>+0x18(SB)/8, $0xffffff0000000000
2007DATA expandAVX512_44_inShuf1<>+0x20(SB)/8, $0xffffffffffff0101
2008DATA expandAVX512_44_inShuf1<>+0x28(SB)/8, $0xffffffffffff0101
2009DATA expandAVX512_44_inShuf1<>+0x30(SB)/8, $0xffffffffffff0101
2010DATA expandAVX512_44_inShuf1<>+0x38(SB)/8, $0xff02020202020101
2011
2012GLOBL expandAVX512_44_mat1<>(SB), RODATA, $0x40
2013DATA expandAVX512_44_mat1<>+0x00(SB)/8, $0x2020202020202020
2014DATA expandAVX512_44_mat1<>+0x08(SB)/8, $0x4040404040404040
2015DATA expandAVX512_44_mat1<>+0x10(SB)/8, $0x4040404080808080
2016DATA expandAVX512_44_mat1<>+0x18(SB)/8, $0x8080808080808080
2017DATA expandAVX512_44_mat1<>+0x20(SB)/8, $0x0101010101010101
2018DATA expandAVX512_44_mat1<>+0x28(SB)/8, $0x0202020202020202
2019DATA expandAVX512_44_mat1<>+0x30(SB)/8, $0x0404040404040404
2020DATA expandAVX512_44_mat1<>+0x38(SB)/8, $0x0808080808080808
2021
2022GLOBL expandAVX512_44_inShuf2<>(SB), RODATA, $0x40
2023DATA expandAVX512_44_inShuf2<>+0x00(SB)/8, $0x0202020101010101
2024DATA expandAVX512_44_inShuf2<>+0x08(SB)/8, $0xffffffffffff0201
2025DATA expandAVX512_44_inShuf2<>+0x10(SB)/8, $0x0202020101010101
2026DATA expandAVX512_44_inShuf2<>+0x18(SB)/8, $0x0202020101010101
2027DATA expandAVX512_44_inShuf2<>+0x20(SB)/8, $0xffffffffffff0201
2028DATA expandAVX512_44_inShuf2<>+0x28(SB)/8, $0xffff020101010101
2029DATA expandAVX512_44_inShuf2<>+0x30(SB)/8, $0xffffff0202020202
2030DATA expandAVX512_44_inShuf2<>+0x38(SB)/8, $0xffffffffffffff02
2031
2032GLOBL expandAVX512_44_mat2<>(SB), RODATA, $0x40
2033DATA expandAVX512_44_mat2<>+0x00(SB)/8, $0x1010101010101010
2034DATA expandAVX512_44_mat2<>+0x08(SB)/8, $0x1010101020202020
2035DATA expandAVX512_44_mat2<>+0x10(SB)/8, $0x2020202020202020
2036DATA expandAVX512_44_mat2<>+0x18(SB)/8, $0x4040404040404040
2037DATA expandAVX512_44_mat2<>+0x20(SB)/8, $0x4040404080808080
2038DATA expandAVX512_44_mat2<>+0x28(SB)/8, $0x8080808080808080
2039DATA expandAVX512_44_mat2<>+0x30(SB)/8, $0x0101010101010101
2040DATA expandAVX512_44_mat2<>+0x38(SB)/8, $0x0101010102020202
2041
2042GLOBL expandAVX512_44_inShuf3<>(SB), RODATA, $0x40
2043DATA expandAVX512_44_inShuf3<>+0x00(SB)/8, $0xffffff0202020202
2044DATA expandAVX512_44_inShuf3<>+0x08(SB)/8, $0xffffff0202020202
2045DATA expandAVX512_44_inShuf3<>+0x10(SB)/8, $0xffffffffffffff02
2046DATA expandAVX512_44_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
2047DATA expandAVX512_44_inShuf3<>+0x20(SB)/8, $0xffffffffffff0202
2048DATA expandAVX512_44_inShuf3<>+0x28(SB)/8, $0xffffffffffff0202
2049DATA expandAVX512_44_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
2050DATA expandAVX512_44_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
2051
2052GLOBL expandAVX512_44_mat3<>(SB), RODATA, $0x40
2053DATA expandAVX512_44_mat3<>+0x00(SB)/8, $0x0202020202020202
2054DATA expandAVX512_44_mat3<>+0x08(SB)/8, $0x0404040404040404
2055DATA expandAVX512_44_mat3<>+0x10(SB)/8, $0x0404040408080808
2056DATA expandAVX512_44_mat3<>+0x18(SB)/8, $0x1010101010101010
2057DATA expandAVX512_44_mat3<>+0x20(SB)/8, $0x2020202020202020
2058DATA expandAVX512_44_mat3<>+0x28(SB)/8, $0x4040404040404040
2059DATA expandAVX512_44_mat3<>+0x30(SB)/8, $0x0000000000000000
2060DATA expandAVX512_44_mat3<>+0x38(SB)/8, $0x0000000000000000
2061
2062GLOBL expandAVX512_44_outShufLo(SB), RODATA, $0x40
2063DATA expandAVX512_44_outShufLo+0x00(SB)/8, $0x1110080403020100
2064DATA expandAVX512_44_outShufLo+0x08(SB)/8, $0x1c1b1a1918141312
2065DATA expandAVX512_44_outShufLo+0x10(SB)/8, $0x31302c2b2a292820
2066DATA expandAVX512_44_outShufLo+0x18(SB)/8, $0x4342414038343332
2067DATA expandAVX512_44_outShufLo+0x20(SB)/8, $0x58504c4b4a494844
2068DATA expandAVX512_44_outShufLo+0x28(SB)/8, $0x600706055c5b5a59
2069DATA expandAVX512_44_outShufLo+0x30(SB)/8, $0x1d69681716150961
2070DATA expandAVX512_44_outShufLo+0x38(SB)/8, $0x2f2e2d2171701f1e
2071
2072GLOBL expandAVX512_44_outShufHi0(SB), RODATA, $0x40
2073DATA expandAVX512_44_outShufHi0+0x00(SB)/8, $0x4844434241403938
2074DATA expandAVX512_44_outShufHi0+0x08(SB)/8, $0x5a59585453525150
2075DATA expandAVX512_44_outShufHi0+0x10(SB)/8, $0x6c6b6a6968605c5b
2076DATA expandAVX512_44_outShufHi0+0x18(SB)/8, $0xffff787473727170
2077DATA expandAVX512_44_outShufHi0+0x20(SB)/8, $0xffffffffffffffff
2078DATA expandAVX512_44_outShufHi0+0x28(SB)/8, $0x46453e3d3c3b3aff
2079DATA expandAVX512_44_outShufHi0+0x30(SB)/8, $0xff57565549ffff47
2080DATA expandAVX512_44_outShufHi0+0x38(SB)/8, $0x6d61ffff5f5e5dff
2081
2082GLOBL expandAVX512_44_outShufHi1(SB), RODATA, $0x40
2083DATA expandAVX512_44_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
2084DATA expandAVX512_44_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
2085DATA expandAVX512_44_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
2086DATA expandAVX512_44_outShufHi1+0x18(SB)/8, $0x0100ffffffffffff
2087DATA expandAVX512_44_outShufHi1+0x20(SB)/8, $0x0c0b0a0908040302
2088DATA expandAVX512_44_outShufHi1+0x28(SB)/8, $0xffffffffffffff10
2089DATA expandAVX512_44_outShufHi1+0x30(SB)/8, $0x20ffffffff1918ff
2090DATA expandAVX512_44_outShufHi1+0x38(SB)/8, $0xffff2928ffffff21
2091
2092TEXT expandAVX512_44<>(SB), NOSPLIT, $0-0
2093 VMOVDQU64 expandAVX512_44_inShuf0<>(SB), Z0
2094 VMOVDQU64 expandAVX512_44_inShuf1<>(SB), Z2
2095 VMOVDQU64 expandAVX512_44_inShuf2<>(SB), Z3
2096 VMOVDQU64 expandAVX512_44_inShuf3<>(SB), Z4
2097 VMOVDQU64 expandAVX512_44_outShufLo(SB), Z1
2098 VMOVDQU64 expandAVX512_44_outShufHi0(SB), Z5
2099 VMOVDQU64 expandAVX512_44_outShufHi1(SB), Z6
2100 VMOVDQU64 (AX), Z7
2101 VPERMB Z7, Z0, Z0
2102 VGF2P8AFFINEQB $0, expandAVX512_44_mat0<>(SB), Z0, Z0
2103 VPERMB Z7, Z2, Z2
2104 VGF2P8AFFINEQB $0, expandAVX512_44_mat1<>(SB), Z2, Z2
2105 VPERMB Z7, Z3, Z3
2106 VGF2P8AFFINEQB $0, expandAVX512_44_mat2<>(SB), Z3, Z3
2107 VPERMB Z7, Z4, Z4
2108 VGF2P8AFFINEQB $0, expandAVX512_44_mat3<>(SB), Z4, Z4
2109 VPERMI2B Z2, Z0, Z1
2110 MOVQ $0xce79fe003fffffff, AX
2111 KMOVQ AX, K1
2112 VPERMI2B.Z Z3, Z2, K1, Z5
2113 MOVQ $0x318601ffc0000000, AX
2114 KMOVQ AX, K1
2115 VPERMB.Z Z4, Z6, K1, Z0
2116 VPORQ Z0, Z5, Z2
2117 RET
2118
2119GLOBL expandAVX512_48_inShuf0<>(SB), RODATA, $0x40
2120DATA expandAVX512_48_inShuf0<>+0x00(SB)/8, $0x0101000000000000
2121DATA expandAVX512_48_inShuf0<>+0x08(SB)/8, $0x0101000000000000
2122DATA expandAVX512_48_inShuf0<>+0x10(SB)/8, $0x0101000000000000
2123DATA expandAVX512_48_inShuf0<>+0x18(SB)/8, $0xffff000000000000
2124DATA expandAVX512_48_inShuf0<>+0x20(SB)/8, $0xffff000000000000
2125DATA expandAVX512_48_inShuf0<>+0x28(SB)/8, $0xffff000000000000
2126DATA expandAVX512_48_inShuf0<>+0x30(SB)/8, $0xffff000000000000
2127DATA expandAVX512_48_inShuf0<>+0x38(SB)/8, $0xffff000000000000
2128
2129GLOBL expandAVX512_48_mat0<>(SB), RODATA, $0x40
2130DATA expandAVX512_48_mat0<>+0x00(SB)/8, $0x0101010101010101
2131DATA expandAVX512_48_mat0<>+0x08(SB)/8, $0x0202020202020202
2132DATA expandAVX512_48_mat0<>+0x10(SB)/8, $0x0404040404040404
2133DATA expandAVX512_48_mat0<>+0x18(SB)/8, $0x0808080808080808
2134DATA expandAVX512_48_mat0<>+0x20(SB)/8, $0x1010101010101010
2135DATA expandAVX512_48_mat0<>+0x28(SB)/8, $0x2020202020202020
2136DATA expandAVX512_48_mat0<>+0x30(SB)/8, $0x4040404040404040
2137DATA expandAVX512_48_mat0<>+0x38(SB)/8, $0x8080808080808080
2138
2139GLOBL expandAVX512_48_inShuf1<>(SB), RODATA, $0x40
2140DATA expandAVX512_48_inShuf1<>+0x00(SB)/8, $0xffffffff01010101
2141DATA expandAVX512_48_inShuf1<>+0x08(SB)/8, $0xffffffff01010101
2142DATA expandAVX512_48_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
2143DATA expandAVX512_48_inShuf1<>+0x18(SB)/8, $0x0202020202020101
2144DATA expandAVX512_48_inShuf1<>+0x20(SB)/8, $0x0202010101010101
2145DATA expandAVX512_48_inShuf1<>+0x28(SB)/8, $0x0202010101010101
2146DATA expandAVX512_48_inShuf1<>+0x30(SB)/8, $0x0202010101010101
2147DATA expandAVX512_48_inShuf1<>+0x38(SB)/8, $0xffff010101010101
2148
2149GLOBL expandAVX512_48_mat1<>(SB), RODATA, $0x40
2150DATA expandAVX512_48_mat1<>+0x00(SB)/8, $0x0101010101010101
2151DATA expandAVX512_48_mat1<>+0x08(SB)/8, $0x0202020202020202
2152DATA expandAVX512_48_mat1<>+0x10(SB)/8, $0x0404040404040404
2153DATA expandAVX512_48_mat1<>+0x18(SB)/8, $0x0404040404040404
2154DATA expandAVX512_48_mat1<>+0x20(SB)/8, $0x0808080808080808
2155DATA expandAVX512_48_mat1<>+0x28(SB)/8, $0x1010101010101010
2156DATA expandAVX512_48_mat1<>+0x30(SB)/8, $0x2020202020202020
2157DATA expandAVX512_48_mat1<>+0x38(SB)/8, $0x4040404040404040
2158
2159GLOBL expandAVX512_48_inShuf2<>(SB), RODATA, $0x40
2160DATA expandAVX512_48_inShuf2<>+0x00(SB)/8, $0xffff010101010101
2161DATA expandAVX512_48_inShuf2<>+0x08(SB)/8, $0xffff020202020202
2162DATA expandAVX512_48_inShuf2<>+0x10(SB)/8, $0xffff020202020202
2163DATA expandAVX512_48_inShuf2<>+0x18(SB)/8, $0xffffffff02020202
2164DATA expandAVX512_48_inShuf2<>+0x20(SB)/8, $0xffffffff02020202
2165DATA expandAVX512_48_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
2166DATA expandAVX512_48_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
2167DATA expandAVX512_48_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
2168
2169GLOBL expandAVX512_48_mat2<>(SB), RODATA, $0x40
2170DATA expandAVX512_48_mat2<>+0x00(SB)/8, $0x8080808080808080
2171DATA expandAVX512_48_mat2<>+0x08(SB)/8, $0x0101010101010101
2172DATA expandAVX512_48_mat2<>+0x10(SB)/8, $0x0202020202020202
2173DATA expandAVX512_48_mat2<>+0x18(SB)/8, $0x0808080808080808
2174DATA expandAVX512_48_mat2<>+0x20(SB)/8, $0x1010101010101010
2175DATA expandAVX512_48_mat2<>+0x28(SB)/8, $0x0000000000000000
2176DATA expandAVX512_48_mat2<>+0x30(SB)/8, $0x0000000000000000
2177DATA expandAVX512_48_mat2<>+0x38(SB)/8, $0x0000000000000000
2178
2179GLOBL expandAVX512_48_outShufLo(SB), RODATA, $0x40
2180DATA expandAVX512_48_outShufLo+0x00(SB)/8, $0x0908050403020100
2181DATA expandAVX512_48_outShufLo+0x08(SB)/8, $0x131211100d0c0b0a
2182DATA expandAVX512_48_outShufLo+0x10(SB)/8, $0x1d1c1b1a19181514
2183DATA expandAVX512_48_outShufLo+0x18(SB)/8, $0x2928252423222120
2184DATA expandAVX512_48_outShufLo+0x20(SB)/8, $0x333231302d2c2b2a
2185DATA expandAVX512_48_outShufLo+0x28(SB)/8, $0x3d3c3b3a39383534
2186DATA expandAVX512_48_outShufLo+0x30(SB)/8, $0x0f0e434241400706
2187DATA expandAVX512_48_outShufLo+0x38(SB)/8, $0x515017164b4a4948
2188
2189GLOBL expandAVX512_48_outShufHi(SB), RODATA, $0x40
2190DATA expandAVX512_48_outShufHi+0x00(SB)/8, $0x2524232221201918
2191DATA expandAVX512_48_outShufHi+0x08(SB)/8, $0x31302d2c2b2a2928
2192DATA expandAVX512_48_outShufHi+0x10(SB)/8, $0x3b3a393835343332
2193DATA expandAVX512_48_outShufHi+0x18(SB)/8, $0x4544434241403d3c
2194DATA expandAVX512_48_outShufHi+0x20(SB)/8, $0x51504d4c4b4a4948
2195DATA expandAVX512_48_outShufHi+0x28(SB)/8, $0x1d1c1b1a55545352
2196DATA expandAVX512_48_outShufHi+0x30(SB)/8, $0x5b5a595827261f1e
2197DATA expandAVX512_48_outShufHi+0x38(SB)/8, $0x3736636261602f2e
2198
2199TEXT expandAVX512_48<>(SB), NOSPLIT, $0-0
2200 VMOVDQU64 expandAVX512_48_inShuf0<>(SB), Z0
2201 VMOVDQU64 expandAVX512_48_inShuf1<>(SB), Z3
2202 VMOVDQU64 expandAVX512_48_inShuf2<>(SB), Z4
2203 VMOVDQU64 expandAVX512_48_outShufLo(SB), Z1
2204 VMOVDQU64 expandAVX512_48_outShufHi(SB), Z2
2205 VMOVDQU64 (AX), Z5
2206 VPERMB Z5, Z0, Z0
2207 VGF2P8AFFINEQB $0, expandAVX512_48_mat0<>(SB), Z0, Z0
2208 VPERMB Z5, Z3, Z3
2209 VGF2P8AFFINEQB $0, expandAVX512_48_mat1<>(SB), Z3, Z3
2210 VPERMB Z5, Z4, Z4
2211 VGF2P8AFFINEQB $0, expandAVX512_48_mat2<>(SB), Z4, Z4
2212 VPERMI2B Z3, Z0, Z1
2213 VPERMI2B Z4, Z3, Z2
2214 RET
2215
2216GLOBL expandAVX512_52_inShuf0<>(SB), RODATA, $0x40
2217DATA expandAVX512_52_inShuf0<>+0x00(SB)/8, $0x0101000000000000
2218DATA expandAVX512_52_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
2219DATA expandAVX512_52_inShuf0<>+0x10(SB)/8, $0x0101000000000000
2220DATA expandAVX512_52_inShuf0<>+0x18(SB)/8, $0xffff000000000000
2221DATA expandAVX512_52_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
2222DATA expandAVX512_52_inShuf0<>+0x28(SB)/8, $0xffff000000000000
2223DATA expandAVX512_52_inShuf0<>+0x30(SB)/8, $0xffff000000000000
2224DATA expandAVX512_52_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
2225
2226GLOBL expandAVX512_52_mat0<>(SB), RODATA, $0x40
2227DATA expandAVX512_52_mat0<>+0x00(SB)/8, $0x0101010101010101
2228DATA expandAVX512_52_mat0<>+0x08(SB)/8, $0x0101010102020202
2229DATA expandAVX512_52_mat0<>+0x10(SB)/8, $0x0202020202020202
2230DATA expandAVX512_52_mat0<>+0x18(SB)/8, $0x0404040404040404
2231DATA expandAVX512_52_mat0<>+0x20(SB)/8, $0x0404040408080808
2232DATA expandAVX512_52_mat0<>+0x28(SB)/8, $0x0808080808080808
2233DATA expandAVX512_52_mat0<>+0x30(SB)/8, $0x1010101010101010
2234DATA expandAVX512_52_mat0<>+0x38(SB)/8, $0x1010101020202020
2235
2236GLOBL expandAVX512_52_inShuf1<>(SB), RODATA, $0x40
2237DATA expandAVX512_52_inShuf1<>+0x00(SB)/8, $0xffff000000000000
2238DATA expandAVX512_52_inShuf1<>+0x08(SB)/8, $0xffff000000000000
2239DATA expandAVX512_52_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
2240DATA expandAVX512_52_inShuf1<>+0x18(SB)/8, $0xffff000000000000
2241DATA expandAVX512_52_inShuf1<>+0x20(SB)/8, $0xffffffff01010101
2242DATA expandAVX512_52_inShuf1<>+0x28(SB)/8, $0xffffffffff010101
2243DATA expandAVX512_52_inShuf1<>+0x30(SB)/8, $0xff02020202020201
2244DATA expandAVX512_52_inShuf1<>+0x38(SB)/8, $0x0202010101010101
2245
2246GLOBL expandAVX512_52_mat1<>(SB), RODATA, $0x40
2247DATA expandAVX512_52_mat1<>+0x00(SB)/8, $0x2020202020202020
2248DATA expandAVX512_52_mat1<>+0x08(SB)/8, $0x4040404040404040
2249DATA expandAVX512_52_mat1<>+0x10(SB)/8, $0x4040404080808080
2250DATA expandAVX512_52_mat1<>+0x18(SB)/8, $0x8080808080808080
2251DATA expandAVX512_52_mat1<>+0x20(SB)/8, $0x0101010101010101
2252DATA expandAVX512_52_mat1<>+0x28(SB)/8, $0x0202020202020202
2253DATA expandAVX512_52_mat1<>+0x30(SB)/8, $0x0202020202020202
2254DATA expandAVX512_52_mat1<>+0x38(SB)/8, $0x0404040404040404
2255
2256GLOBL expandAVX512_52_inShuf2<>(SB), RODATA, $0x40
2257DATA expandAVX512_52_inShuf2<>+0x00(SB)/8, $0xffffffffffff0201
2258DATA expandAVX512_52_inShuf2<>+0x08(SB)/8, $0x0202010101010101
2259DATA expandAVX512_52_inShuf2<>+0x10(SB)/8, $0xffff010101010101
2260DATA expandAVX512_52_inShuf2<>+0x18(SB)/8, $0xffffffffffffff01
2261DATA expandAVX512_52_inShuf2<>+0x20(SB)/8, $0xffff010101010101
2262DATA expandAVX512_52_inShuf2<>+0x28(SB)/8, $0xffff010101010101
2263DATA expandAVX512_52_inShuf2<>+0x30(SB)/8, $0xffffffffffffff01
2264DATA expandAVX512_52_inShuf2<>+0x38(SB)/8, $0xffff010101010101
2265
2266GLOBL expandAVX512_52_mat2<>(SB), RODATA, $0x40
2267DATA expandAVX512_52_mat2<>+0x00(SB)/8, $0x0404040408080808
2268DATA expandAVX512_52_mat2<>+0x08(SB)/8, $0x0808080808080808
2269DATA expandAVX512_52_mat2<>+0x10(SB)/8, $0x1010101010101010
2270DATA expandAVX512_52_mat2<>+0x18(SB)/8, $0x1010101020202020
2271DATA expandAVX512_52_mat2<>+0x20(SB)/8, $0x2020202020202020
2272DATA expandAVX512_52_mat2<>+0x28(SB)/8, $0x4040404040404040
2273DATA expandAVX512_52_mat2<>+0x30(SB)/8, $0x4040404080808080
2274DATA expandAVX512_52_mat2<>+0x38(SB)/8, $0x8080808080808080
2275
2276GLOBL expandAVX512_52_inShuf3<>(SB), RODATA, $0x40
2277DATA expandAVX512_52_inShuf3<>+0x00(SB)/8, $0xffff020202020202
2278DATA expandAVX512_52_inShuf3<>+0x08(SB)/8, $0xffffffffffffff02
2279DATA expandAVX512_52_inShuf3<>+0x10(SB)/8, $0xffffffff02020202
2280DATA expandAVX512_52_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
2281DATA expandAVX512_52_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
2282DATA expandAVX512_52_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
2283DATA expandAVX512_52_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
2284DATA expandAVX512_52_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
2285
2286GLOBL expandAVX512_52_mat3<>(SB), RODATA, $0x40
2287DATA expandAVX512_52_mat3<>+0x00(SB)/8, $0x0101010101010101
2288DATA expandAVX512_52_mat3<>+0x08(SB)/8, $0x0101010102020202
2289DATA expandAVX512_52_mat3<>+0x10(SB)/8, $0x0404040404040404
2290DATA expandAVX512_52_mat3<>+0x18(SB)/8, $0x0808080808080808
2291DATA expandAVX512_52_mat3<>+0x20(SB)/8, $0x0000000000000000
2292DATA expandAVX512_52_mat3<>+0x28(SB)/8, $0x0000000000000000
2293DATA expandAVX512_52_mat3<>+0x30(SB)/8, $0x0000000000000000
2294DATA expandAVX512_52_mat3<>+0x38(SB)/8, $0x0000000000000000
2295
2296GLOBL expandAVX512_52_outShufLo(SB), RODATA, $0x40
2297DATA expandAVX512_52_outShufLo+0x00(SB)/8, $0x1008050403020100
2298DATA expandAVX512_52_outShufLo+0x08(SB)/8, $0x1a19181514131211
2299DATA expandAVX512_52_outShufLo+0x10(SB)/8, $0x2b2a2928201d1c1b
2300DATA expandAVX512_52_outShufLo+0x18(SB)/8, $0x3534333231302d2c
2301DATA expandAVX512_52_outShufLo+0x20(SB)/8, $0x4845444342414038
2302DATA expandAVX512_52_outShufLo+0x28(SB)/8, $0x5958504d4c4b4a49
2303DATA expandAVX512_52_outShufLo+0x30(SB)/8, $0x616007065d5c5b5a
2304DATA expandAVX512_52_outShufLo+0x38(SB)/8, $0x6a69681716096362
2305
2306GLOBL expandAVX512_52_outShufHi0(SB), RODATA, $0x40
2307DATA expandAVX512_52_outShufHi0+0x00(SB)/8, $0x403d3c3b3a393830
2308DATA expandAVX512_52_outShufHi0+0x08(SB)/8, $0x51504d4c4b4a4948
2309DATA expandAVX512_52_outShufHi0+0x10(SB)/8, $0x6261605855545352
2310DATA expandAVX512_52_outShufHi0+0x18(SB)/8, $0x6c6b6a6968656463
2311DATA expandAVX512_52_outShufHi0+0x20(SB)/8, $0x7d7c7b7a7978706d
2312DATA expandAVX512_52_outShufHi0+0x28(SB)/8, $0x31ffffffffffffff
2313DATA expandAVX512_52_outShufHi0+0x30(SB)/8, $0xff3f3e3635343332
2314DATA expandAVX512_52_outShufHi0+0x38(SB)/8, $0xffff4f4e41ffffff
2315
2316GLOBL expandAVX512_52_outShufHi1(SB), RODATA, $0x40
2317DATA expandAVX512_52_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
2318DATA expandAVX512_52_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
2319DATA expandAVX512_52_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
2320DATA expandAVX512_52_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
2321DATA expandAVX512_52_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
2322DATA expandAVX512_52_outShufHi1+0x28(SB)/8, $0xff08050403020100
2323DATA expandAVX512_52_outShufHi1+0x30(SB)/8, $0x10ffffffffffffff
2324DATA expandAVX512_52_outShufHi1+0x38(SB)/8, $0x1918ffffff131211
2325
2326TEXT expandAVX512_52<>(SB), NOSPLIT, $0-0
2327 VMOVDQU64 expandAVX512_52_inShuf0<>(SB), Z0
2328 VMOVDQU64 expandAVX512_52_inShuf1<>(SB), Z2
2329 VMOVDQU64 expandAVX512_52_inShuf2<>(SB), Z3
2330 VMOVDQU64 expandAVX512_52_inShuf3<>(SB), Z4
2331 VMOVDQU64 expandAVX512_52_outShufLo(SB), Z1
2332 VMOVDQU64 expandAVX512_52_outShufHi0(SB), Z5
2333 VMOVDQU64 expandAVX512_52_outShufHi1(SB), Z6
2334 VMOVDQU64 (AX), Z7
2335 VPERMB Z7, Z0, Z0
2336 VGF2P8AFFINEQB $0, expandAVX512_52_mat0<>(SB), Z0, Z0
2337 VPERMB Z7, Z2, Z2
2338 VGF2P8AFFINEQB $0, expandAVX512_52_mat1<>(SB), Z2, Z2
2339 VPERMB Z7, Z3, Z3
2340 VGF2P8AFFINEQB $0, expandAVX512_52_mat2<>(SB), Z3, Z3
2341 VPERMB Z7, Z4, Z4
2342 VGF2P8AFFINEQB $0, expandAVX512_52_mat3<>(SB), Z4, Z4
2343 VPERMI2B Z2, Z0, Z1
2344 MOVQ $0x387f80ffffffffff, AX
2345 KMOVQ AX, K1
2346 VPERMI2B.Z Z3, Z2, K1, Z5
2347 MOVQ $0xc7807f0000000000, AX
2348 KMOVQ AX, K1
2349 VPERMB.Z Z4, Z6, K1, Z0
2350 VPORQ Z0, Z5, Z2
2351 RET
2352
2353GLOBL expandAVX512_56_inShuf0<>(SB), RODATA, $0x40
2354DATA expandAVX512_56_inShuf0<>+0x00(SB)/8, $0x0100000000000000
2355DATA expandAVX512_56_inShuf0<>+0x08(SB)/8, $0x0100000000000000
2356DATA expandAVX512_56_inShuf0<>+0x10(SB)/8, $0xff00000000000000
2357DATA expandAVX512_56_inShuf0<>+0x18(SB)/8, $0xff00000000000000
2358DATA expandAVX512_56_inShuf0<>+0x20(SB)/8, $0xff00000000000000
2359DATA expandAVX512_56_inShuf0<>+0x28(SB)/8, $0xff00000000000000
2360DATA expandAVX512_56_inShuf0<>+0x30(SB)/8, $0xff00000000000000
2361DATA expandAVX512_56_inShuf0<>+0x38(SB)/8, $0xff00000000000000
2362
2363GLOBL expandAVX512_56_mat0<>(SB), RODATA, $0x40
2364DATA expandAVX512_56_mat0<>+0x00(SB)/8, $0x0101010101010101
2365DATA expandAVX512_56_mat0<>+0x08(SB)/8, $0x0202020202020202
2366DATA expandAVX512_56_mat0<>+0x10(SB)/8, $0x0404040404040404
2367DATA expandAVX512_56_mat0<>+0x18(SB)/8, $0x0808080808080808
2368DATA expandAVX512_56_mat0<>+0x20(SB)/8, $0x1010101010101010
2369DATA expandAVX512_56_mat0<>+0x28(SB)/8, $0x2020202020202020
2370DATA expandAVX512_56_mat0<>+0x30(SB)/8, $0x4040404040404040
2371DATA expandAVX512_56_mat0<>+0x38(SB)/8, $0x8080808080808080
2372
2373GLOBL expandAVX512_56_inShuf1<>(SB), RODATA, $0x40
2374DATA expandAVX512_56_inShuf1<>+0x00(SB)/8, $0xffff010101010101
2375DATA expandAVX512_56_inShuf1<>+0x08(SB)/8, $0x0202010101010101
2376DATA expandAVX512_56_inShuf1<>+0x10(SB)/8, $0x0201010101010101
2377DATA expandAVX512_56_inShuf1<>+0x18(SB)/8, $0xff01010101010101
2378DATA expandAVX512_56_inShuf1<>+0x20(SB)/8, $0xff01010101010101
2379DATA expandAVX512_56_inShuf1<>+0x28(SB)/8, $0xff01010101010101
2380DATA expandAVX512_56_inShuf1<>+0x30(SB)/8, $0xff01010101010101
2381DATA expandAVX512_56_inShuf1<>+0x38(SB)/8, $0xff01010101010101
2382
2383GLOBL expandAVX512_56_inShuf2<>(SB), RODATA, $0x40
2384DATA expandAVX512_56_inShuf2<>+0x00(SB)/8, $0xff02020202020202
2385DATA expandAVX512_56_inShuf2<>+0x08(SB)/8, $0xffffff0202020202
2386DATA expandAVX512_56_inShuf2<>+0x10(SB)/8, $0xffffffffffffff02
2387DATA expandAVX512_56_inShuf2<>+0x18(SB)/8, $0xffffffffffffffff
2388DATA expandAVX512_56_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
2389DATA expandAVX512_56_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
2390DATA expandAVX512_56_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
2391DATA expandAVX512_56_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
2392
2393GLOBL expandAVX512_56_mat2<>(SB), RODATA, $0x40
2394DATA expandAVX512_56_mat2<>+0x00(SB)/8, $0x0101010101010101
2395DATA expandAVX512_56_mat2<>+0x08(SB)/8, $0x0202020202020202
2396DATA expandAVX512_56_mat2<>+0x10(SB)/8, $0x0404040404040404
2397DATA expandAVX512_56_mat2<>+0x18(SB)/8, $0x0000000000000000
2398DATA expandAVX512_56_mat2<>+0x20(SB)/8, $0x0000000000000000
2399DATA expandAVX512_56_mat2<>+0x28(SB)/8, $0x0000000000000000
2400DATA expandAVX512_56_mat2<>+0x30(SB)/8, $0x0000000000000000
2401DATA expandAVX512_56_mat2<>+0x38(SB)/8, $0x0000000000000000
2402
2403GLOBL expandAVX512_56_outShufLo(SB), RODATA, $0x40
2404DATA expandAVX512_56_outShufLo+0x00(SB)/8, $0x0806050403020100
2405DATA expandAVX512_56_outShufLo+0x08(SB)/8, $0x11100e0d0c0b0a09
2406DATA expandAVX512_56_outShufLo+0x10(SB)/8, $0x1a19181615141312
2407DATA expandAVX512_56_outShufLo+0x18(SB)/8, $0x232221201e1d1c1b
2408DATA expandAVX512_56_outShufLo+0x20(SB)/8, $0x2c2b2a2928262524
2409DATA expandAVX512_56_outShufLo+0x28(SB)/8, $0x3534333231302e2d
2410DATA expandAVX512_56_outShufLo+0x30(SB)/8, $0x3e3d3c3b3a393836
2411DATA expandAVX512_56_outShufLo+0x38(SB)/8, $0x0f45444342414007
2412
2413GLOBL expandAVX512_56_outShufHi(SB), RODATA, $0x40
2414DATA expandAVX512_56_outShufHi+0x00(SB)/8, $0x11100d0c0b0a0908
2415DATA expandAVX512_56_outShufHi+0x08(SB)/8, $0x1a19181615141312
2416DATA expandAVX512_56_outShufHi+0x10(SB)/8, $0x232221201e1d1c1b
2417DATA expandAVX512_56_outShufHi+0x18(SB)/8, $0x2c2b2a2928262524
2418DATA expandAVX512_56_outShufHi+0x20(SB)/8, $0x3534333231302e2d
2419DATA expandAVX512_56_outShufHi+0x28(SB)/8, $0x3e3d3c3b3a393836
2420DATA expandAVX512_56_outShufHi+0x30(SB)/8, $0x0e46454443424140
2421DATA expandAVX512_56_outShufHi+0x38(SB)/8, $0x50174c4b4a49480f
2422
2423TEXT expandAVX512_56<>(SB), NOSPLIT, $0-0
2424 VMOVDQU64 expandAVX512_56_inShuf0<>(SB), Z0
2425 VMOVDQU64 expandAVX512_56_mat0<>(SB), Z3
2426 VMOVDQU64 expandAVX512_56_inShuf1<>(SB), Z4
2427 VMOVDQU64 expandAVX512_56_inShuf2<>(SB), Z5
2428 VMOVDQU64 expandAVX512_56_outShufLo(SB), Z1
2429 VMOVDQU64 expandAVX512_56_outShufHi(SB), Z2
2430 VMOVDQU64 (AX), Z6
2431 VPERMB Z6, Z0, Z0
2432 VGF2P8AFFINEQB $0, Z3, Z0, Z0
2433 VPERMB Z6, Z4, Z4
2434 VGF2P8AFFINEQB $0, Z3, Z4, Z3
2435 VPERMB Z6, Z5, Z4
2436 VGF2P8AFFINEQB $0, expandAVX512_56_mat2<>(SB), Z4, Z4
2437 VPERMI2B Z3, Z0, Z1
2438 VPERMI2B Z4, Z3, Z2
2439 RET
2440
2441GLOBL expandAVX512_60_inShuf0<>(SB), RODATA, $0x40
2442DATA expandAVX512_60_inShuf0<>+0x00(SB)/8, $0x0100000000000000
2443DATA expandAVX512_60_inShuf0<>+0x08(SB)/8, $0xffffffffffffff00
2444DATA expandAVX512_60_inShuf0<>+0x10(SB)/8, $0xff00000000000000
2445DATA expandAVX512_60_inShuf0<>+0x18(SB)/8, $0xff00000000000000
2446DATA expandAVX512_60_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
2447DATA expandAVX512_60_inShuf0<>+0x28(SB)/8, $0xff00000000000000
2448DATA expandAVX512_60_inShuf0<>+0x30(SB)/8, $0xff00000000000000
2449DATA expandAVX512_60_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
2450
2451GLOBL expandAVX512_60_mat0<>(SB), RODATA, $0x40
2452DATA expandAVX512_60_mat0<>+0x00(SB)/8, $0x0101010101010101
2453DATA expandAVX512_60_mat0<>+0x08(SB)/8, $0x0101010102020202
2454DATA expandAVX512_60_mat0<>+0x10(SB)/8, $0x0202020202020202
2455DATA expandAVX512_60_mat0<>+0x18(SB)/8, $0x0404040404040404
2456DATA expandAVX512_60_mat0<>+0x20(SB)/8, $0x0404040408080808
2457DATA expandAVX512_60_mat0<>+0x28(SB)/8, $0x0808080808080808
2458DATA expandAVX512_60_mat0<>+0x30(SB)/8, $0x1010101010101010
2459DATA expandAVX512_60_mat0<>+0x38(SB)/8, $0x1010101020202020
2460
2461GLOBL expandAVX512_60_inShuf1<>(SB), RODATA, $0x40
2462DATA expandAVX512_60_inShuf1<>+0x00(SB)/8, $0xff00000000000000
2463DATA expandAVX512_60_inShuf1<>+0x08(SB)/8, $0xff00000000000000
2464DATA expandAVX512_60_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
2465DATA expandAVX512_60_inShuf1<>+0x18(SB)/8, $0xff00000000000000
2466DATA expandAVX512_60_inShuf1<>+0x20(SB)/8, $0xffffffffff010101
2467DATA expandAVX512_60_inShuf1<>+0x28(SB)/8, $0x0202020202010101
2468DATA expandAVX512_60_inShuf1<>+0x30(SB)/8, $0xffffffffffff0201
2469DATA expandAVX512_60_inShuf1<>+0x38(SB)/8, $0xff01010101010101
2470
2471GLOBL expandAVX512_60_mat1<>(SB), RODATA, $0x40
2472DATA expandAVX512_60_mat1<>+0x00(SB)/8, $0x2020202020202020
2473DATA expandAVX512_60_mat1<>+0x08(SB)/8, $0x4040404040404040
2474DATA expandAVX512_60_mat1<>+0x10(SB)/8, $0x4040404080808080
2475DATA expandAVX512_60_mat1<>+0x18(SB)/8, $0x8080808080808080
2476DATA expandAVX512_60_mat1<>+0x20(SB)/8, $0x0101010101010101
2477DATA expandAVX512_60_mat1<>+0x28(SB)/8, $0x0101010101010101
2478DATA expandAVX512_60_mat1<>+0x30(SB)/8, $0x0101010102020202
2479DATA expandAVX512_60_mat1<>+0x38(SB)/8, $0x0202020202020202
2480
2481GLOBL expandAVX512_60_inShuf2<>(SB), RODATA, $0x40
2482DATA expandAVX512_60_inShuf2<>+0x00(SB)/8, $0xff01010101010101
2483DATA expandAVX512_60_inShuf2<>+0x08(SB)/8, $0xffffffffffffff01
2484DATA expandAVX512_60_inShuf2<>+0x10(SB)/8, $0xff01010101010101
2485DATA expandAVX512_60_inShuf2<>+0x18(SB)/8, $0xff01010101010101
2486DATA expandAVX512_60_inShuf2<>+0x20(SB)/8, $0xffffffffffffff01
2487DATA expandAVX512_60_inShuf2<>+0x28(SB)/8, $0xff01010101010101
2488DATA expandAVX512_60_inShuf2<>+0x30(SB)/8, $0xff01010101010101
2489DATA expandAVX512_60_inShuf2<>+0x38(SB)/8, $0xffffffffffffff01
2490
2491GLOBL expandAVX512_60_mat2<>(SB), RODATA, $0x40
2492DATA expandAVX512_60_mat2<>+0x00(SB)/8, $0x0404040404040404
2493DATA expandAVX512_60_mat2<>+0x08(SB)/8, $0x0404040408080808
2494DATA expandAVX512_60_mat2<>+0x10(SB)/8, $0x0808080808080808
2495DATA expandAVX512_60_mat2<>+0x18(SB)/8, $0x1010101010101010
2496DATA expandAVX512_60_mat2<>+0x20(SB)/8, $0x1010101020202020
2497DATA expandAVX512_60_mat2<>+0x28(SB)/8, $0x2020202020202020
2498DATA expandAVX512_60_mat2<>+0x30(SB)/8, $0x4040404040404040
2499DATA expandAVX512_60_mat2<>+0x38(SB)/8, $0x4040404080808080
2500
2501GLOBL expandAVX512_60_inShuf3<>(SB), RODATA, $0x40
2502DATA expandAVX512_60_inShuf3<>+0x00(SB)/8, $0xff01010101010101
2503DATA expandAVX512_60_inShuf3<>+0x08(SB)/8, $0xffffffffffff0202
2504DATA expandAVX512_60_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
2505DATA expandAVX512_60_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
2506DATA expandAVX512_60_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
2507DATA expandAVX512_60_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
2508DATA expandAVX512_60_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
2509DATA expandAVX512_60_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
2510
2511GLOBL expandAVX512_60_mat3<>(SB), RODATA, $0x40
2512DATA expandAVX512_60_mat3<>+0x00(SB)/8, $0x8080808080808080
2513DATA expandAVX512_60_mat3<>+0x08(SB)/8, $0x0101010101010101
2514DATA expandAVX512_60_mat3<>+0x10(SB)/8, $0x0000000000000000
2515DATA expandAVX512_60_mat3<>+0x18(SB)/8, $0x0000000000000000
2516DATA expandAVX512_60_mat3<>+0x20(SB)/8, $0x0000000000000000
2517DATA expandAVX512_60_mat3<>+0x28(SB)/8, $0x0000000000000000
2518DATA expandAVX512_60_mat3<>+0x30(SB)/8, $0x0000000000000000
2519DATA expandAVX512_60_mat3<>+0x38(SB)/8, $0x0000000000000000
2520
2521GLOBL expandAVX512_60_outShufLo(SB), RODATA, $0x40
2522DATA expandAVX512_60_outShufLo+0x00(SB)/8, $0x0806050403020100
2523DATA expandAVX512_60_outShufLo+0x08(SB)/8, $0x1816151413121110
2524DATA expandAVX512_60_outShufLo+0x10(SB)/8, $0x28201e1d1c1b1a19
2525DATA expandAVX512_60_outShufLo+0x18(SB)/8, $0x31302e2d2c2b2a29
2526DATA expandAVX512_60_outShufLo+0x20(SB)/8, $0x4140383635343332
2527DATA expandAVX512_60_outShufLo+0x28(SB)/8, $0x4a49484645444342
2528DATA expandAVX512_60_outShufLo+0x30(SB)/8, $0x5a5958504e4d4c4b
2529DATA expandAVX512_60_outShufLo+0x38(SB)/8, $0x626160075e5d5c5b
2530
2531GLOBL expandAVX512_60_outShufHi0(SB), RODATA, $0x40
2532DATA expandAVX512_60_outShufHi0+0x00(SB)/8, $0x3b3a3938302a2928
2533DATA expandAVX512_60_outShufHi0+0x08(SB)/8, $0x44434241403e3d3c
2534DATA expandAVX512_60_outShufHi0+0x10(SB)/8, $0x5453525150484645
2535DATA expandAVX512_60_outShufHi0+0x18(SB)/8, $0x5d5c5b5a59585655
2536DATA expandAVX512_60_outShufHi0+0x20(SB)/8, $0x6d6c6b6a6968605e
2537DATA expandAVX512_60_outShufHi0+0x28(SB)/8, $0x767574737271706e
2538DATA expandAVX512_60_outShufHi0+0x30(SB)/8, $0xffffffffffffff78
2539DATA expandAVX512_60_outShufHi0+0x38(SB)/8, $0x31ffff2f2e2d2c2b
2540
2541GLOBL expandAVX512_60_outShufHi1(SB), RODATA, $0x40
2542DATA expandAVX512_60_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
2543DATA expandAVX512_60_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
2544DATA expandAVX512_60_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
2545DATA expandAVX512_60_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
2546DATA expandAVX512_60_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
2547DATA expandAVX512_60_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
2548DATA expandAVX512_60_outShufHi1+0x30(SB)/8, $0x06050403020100ff
2549DATA expandAVX512_60_outShufHi1+0x38(SB)/8, $0xff0908ffffffffff
2550
2551TEXT expandAVX512_60<>(SB), NOSPLIT, $0-0
2552 VMOVDQU64 expandAVX512_60_inShuf0<>(SB), Z0
2553 VMOVDQU64 expandAVX512_60_inShuf1<>(SB), Z2
2554 VMOVDQU64 expandAVX512_60_inShuf2<>(SB), Z3
2555 VMOVDQU64 expandAVX512_60_inShuf3<>(SB), Z4
2556 VMOVDQU64 expandAVX512_60_outShufLo(SB), Z1
2557 VMOVDQU64 expandAVX512_60_outShufHi0(SB), Z5
2558 VMOVDQU64 expandAVX512_60_outShufHi1(SB), Z6
2559 VMOVDQU64 (AX), Z7
2560 VPERMB Z7, Z0, Z0
2561 VGF2P8AFFINEQB $0, expandAVX512_60_mat0<>(SB), Z0, Z0
2562 VPERMB Z7, Z2, Z2
2563 VGF2P8AFFINEQB $0, expandAVX512_60_mat1<>(SB), Z2, Z2
2564 VPERMB Z7, Z3, Z3
2565 VGF2P8AFFINEQB $0, expandAVX512_60_mat2<>(SB), Z3, Z3
2566 VPERMB Z7, Z4, Z4
2567 VGF2P8AFFINEQB $0, expandAVX512_60_mat3<>(SB), Z4, Z4
2568 VPERMI2B Z2, Z0, Z1
2569 MOVQ $0x9f01ffffffffffff, AX
2570 KMOVQ AX, K1
2571 VPERMI2B.Z Z3, Z2, K1, Z5
2572 MOVQ $0x60fe000000000000, AX
2573 KMOVQ AX, K1
2574 VPERMB.Z Z4, Z6, K1, Z0
2575 VPORQ Z0, Z5, Z2
2576 RET
2577
2578GLOBL expandAVX512_64_inShuf0<>(SB), RODATA, $0x40
2579DATA expandAVX512_64_inShuf0<>+0x00(SB)/8, $0x0000000000000000
2580DATA expandAVX512_64_inShuf0<>+0x08(SB)/8, $0x0000000000000000
2581DATA expandAVX512_64_inShuf0<>+0x10(SB)/8, $0x0000000000000000
2582DATA expandAVX512_64_inShuf0<>+0x18(SB)/8, $0x0000000000000000
2583DATA expandAVX512_64_inShuf0<>+0x20(SB)/8, $0x0000000000000000
2584DATA expandAVX512_64_inShuf0<>+0x28(SB)/8, $0x0000000000000000
2585DATA expandAVX512_64_inShuf0<>+0x30(SB)/8, $0x0000000000000000
2586DATA expandAVX512_64_inShuf0<>+0x38(SB)/8, $0x0000000000000000
2587
2588GLOBL expandAVX512_64_mat0<>(SB), RODATA, $0x40
2589DATA expandAVX512_64_mat0<>+0x00(SB)/8, $0x0101010101010101
2590DATA expandAVX512_64_mat0<>+0x08(SB)/8, $0x0202020202020202
2591DATA expandAVX512_64_mat0<>+0x10(SB)/8, $0x0404040404040404
2592DATA expandAVX512_64_mat0<>+0x18(SB)/8, $0x0808080808080808
2593DATA expandAVX512_64_mat0<>+0x20(SB)/8, $0x1010101010101010
2594DATA expandAVX512_64_mat0<>+0x28(SB)/8, $0x2020202020202020
2595DATA expandAVX512_64_mat0<>+0x30(SB)/8, $0x4040404040404040
2596DATA expandAVX512_64_mat0<>+0x38(SB)/8, $0x8080808080808080
2597
2598GLOBL expandAVX512_64_inShuf1<>(SB), RODATA, $0x40
2599DATA expandAVX512_64_inShuf1<>+0x00(SB)/8, $0x0101010101010101
2600DATA expandAVX512_64_inShuf1<>+0x08(SB)/8, $0x0101010101010101
2601DATA expandAVX512_64_inShuf1<>+0x10(SB)/8, $0x0101010101010101
2602DATA expandAVX512_64_inShuf1<>+0x18(SB)/8, $0x0101010101010101
2603DATA expandAVX512_64_inShuf1<>+0x20(SB)/8, $0x0101010101010101
2604DATA expandAVX512_64_inShuf1<>+0x28(SB)/8, $0x0101010101010101
2605DATA expandAVX512_64_inShuf1<>+0x30(SB)/8, $0x0101010101010101
2606DATA expandAVX512_64_inShuf1<>+0x38(SB)/8, $0x0101010101010101
2607
2608GLOBL expandAVX512_64_outShufLo(SB), RODATA, $0x40
2609DATA expandAVX512_64_outShufLo+0x00(SB)/8, $0x0706050403020100
2610DATA expandAVX512_64_outShufLo+0x08(SB)/8, $0x0f0e0d0c0b0a0908
2611DATA expandAVX512_64_outShufLo+0x10(SB)/8, $0x1716151413121110
2612DATA expandAVX512_64_outShufLo+0x18(SB)/8, $0x1f1e1d1c1b1a1918
2613DATA expandAVX512_64_outShufLo+0x20(SB)/8, $0x2726252423222120
2614DATA expandAVX512_64_outShufLo+0x28(SB)/8, $0x2f2e2d2c2b2a2928
2615DATA expandAVX512_64_outShufLo+0x30(SB)/8, $0x3736353433323130
2616DATA expandAVX512_64_outShufLo+0x38(SB)/8, $0x3f3e3d3c3b3a3938
2617
2618TEXT expandAVX512_64<>(SB), NOSPLIT, $0-0
2619 VMOVDQU64 expandAVX512_64_inShuf0<>(SB), Z0
2620 VMOVDQU64 expandAVX512_64_mat0<>(SB), Z1
2621 VMOVDQU64 expandAVX512_64_inShuf1<>(SB), Z2
2622 VMOVDQU64 expandAVX512_64_outShufLo(SB), Z3
2623 VMOVDQU64 (AX), Z4
2624 VPERMB Z4, Z0, Z0
2625 VGF2P8AFFINEQB $0, Z1, Z0, Z0
2626 VPERMB Z4, Z2, Z2
2627 VGF2P8AFFINEQB $0, Z1, Z2, Z2
2628 VPERMB Z0, Z3, Z1
2629 VPERMB Z2, Z3, Z2
2630 RET
2631
View as plain text