...
Run Format

Text file src/internal/runtime/gc/scan/expand_amd64.s

Documentation: internal/runtime/gc/scan

     1// Code generated by mkasm.go. DO NOT EDIT.
     2
     3#include "go_asm.h"
     4#include "textflag.h"
     5
     6GLOBL ·gcExpandersAVX512(SB), RODATA, $0x220
     7DATA  ·gcExpandersAVX512+0x00(SB)/8, $0
     8DATA  ·gcExpandersAVX512+0x08(SB)/8, $expandAVX512_1<>(SB)
     9DATA  ·gcExpandersAVX512+0x10(SB)/8, $expandAVX512_2<>(SB)
    10DATA  ·gcExpandersAVX512+0x18(SB)/8, $expandAVX512_3<>(SB)
    11DATA  ·gcExpandersAVX512+0x20(SB)/8, $expandAVX512_4<>(SB)
    12DATA  ·gcExpandersAVX512+0x28(SB)/8, $expandAVX512_6<>(SB)
    13DATA  ·gcExpandersAVX512+0x30(SB)/8, $expandAVX512_8<>(SB)
    14DATA  ·gcExpandersAVX512+0x38(SB)/8, $expandAVX512_10<>(SB)
    15DATA  ·gcExpandersAVX512+0x40(SB)/8, $expandAVX512_12<>(SB)
    16DATA  ·gcExpandersAVX512+0x48(SB)/8, $expandAVX512_14<>(SB)
    17DATA  ·gcExpandersAVX512+0x50(SB)/8, $expandAVX512_16<>(SB)
    18DATA  ·gcExpandersAVX512+0x58(SB)/8, $expandAVX512_18<>(SB)
    19DATA  ·gcExpandersAVX512+0x60(SB)/8, $expandAVX512_20<>(SB)
    20DATA  ·gcExpandersAVX512+0x68(SB)/8, $expandAVX512_22<>(SB)
    21DATA  ·gcExpandersAVX512+0x70(SB)/8, $expandAVX512_24<>(SB)
    22DATA  ·gcExpandersAVX512+0x78(SB)/8, $expandAVX512_26<>(SB)
    23DATA  ·gcExpandersAVX512+0x80(SB)/8, $expandAVX512_28<>(SB)
    24DATA  ·gcExpandersAVX512+0x88(SB)/8, $expandAVX512_30<>(SB)
    25DATA  ·gcExpandersAVX512+0x90(SB)/8, $expandAVX512_32<>(SB)
    26DATA  ·gcExpandersAVX512+0x98(SB)/8, $expandAVX512_36<>(SB)
    27DATA  ·gcExpandersAVX512+0xa0(SB)/8, $expandAVX512_40<>(SB)
    28DATA  ·gcExpandersAVX512+0xa8(SB)/8, $expandAVX512_44<>(SB)
    29DATA  ·gcExpandersAVX512+0xb0(SB)/8, $expandAVX512_48<>(SB)
    30DATA  ·gcExpandersAVX512+0xb8(SB)/8, $expandAVX512_52<>(SB)
    31DATA  ·gcExpandersAVX512+0xc0(SB)/8, $expandAVX512_56<>(SB)
    32DATA  ·gcExpandersAVX512+0xc8(SB)/8, $expandAVX512_60<>(SB)
    33DATA  ·gcExpandersAVX512+0xd0(SB)/8, $expandAVX512_64<>(SB)
    34DATA  ·gcExpandersAVX512+0xd8(SB)/8, $0
    35DATA  ·gcExpandersAVX512+0xe0(SB)/8, $0
    36DATA  ·gcExpandersAVX512+0xe8(SB)/8, $0
    37DATA  ·gcExpandersAVX512+0xf0(SB)/8, $0
    38DATA  ·gcExpandersAVX512+0xf8(SB)/8, $0
    39DATA  ·gcExpandersAVX512+0x100(SB)/8, $0
    40DATA  ·gcExpandersAVX512+0x108(SB)/8, $0
    41DATA  ·gcExpandersAVX512+0x110(SB)/8, $0
    42DATA  ·gcExpandersAVX512+0x118(SB)/8, $0
    43DATA  ·gcExpandersAVX512+0x120(SB)/8, $0
    44DATA  ·gcExpandersAVX512+0x128(SB)/8, $0
    45DATA  ·gcExpandersAVX512+0x130(SB)/8, $0
    46DATA  ·gcExpandersAVX512+0x138(SB)/8, $0
    47DATA  ·gcExpandersAVX512+0x140(SB)/8, $0
    48DATA  ·gcExpandersAVX512+0x148(SB)/8, $0
    49DATA  ·gcExpandersAVX512+0x150(SB)/8, $0
    50DATA  ·gcExpandersAVX512+0x158(SB)/8, $0
    51DATA  ·gcExpandersAVX512+0x160(SB)/8, $0
    52DATA  ·gcExpandersAVX512+0x168(SB)/8, $0
    53DATA  ·gcExpandersAVX512+0x170(SB)/8, $0
    54DATA  ·gcExpandersAVX512+0x178(SB)/8, $0
    55DATA  ·gcExpandersAVX512+0x180(SB)/8, $0
    56DATA  ·gcExpandersAVX512+0x188(SB)/8, $0
    57DATA  ·gcExpandersAVX512+0x190(SB)/8, $0
    58DATA  ·gcExpandersAVX512+0x198(SB)/8, $0
    59DATA  ·gcExpandersAVX512+0x1a0(SB)/8, $0
    60DATA  ·gcExpandersAVX512+0x1a8(SB)/8, $0
    61DATA  ·gcExpandersAVX512+0x1b0(SB)/8, $0
    62DATA  ·gcExpandersAVX512+0x1b8(SB)/8, $0
    63DATA  ·gcExpandersAVX512+0x1c0(SB)/8, $0
    64DATA  ·gcExpandersAVX512+0x1c8(SB)/8, $0
    65DATA  ·gcExpandersAVX512+0x1d0(SB)/8, $0
    66DATA  ·gcExpandersAVX512+0x1d8(SB)/8, $0
    67DATA  ·gcExpandersAVX512+0x1e0(SB)/8, $0
    68DATA  ·gcExpandersAVX512+0x1e8(SB)/8, $0
    69DATA  ·gcExpandersAVX512+0x1f0(SB)/8, $0
    70DATA  ·gcExpandersAVX512+0x1f8(SB)/8, $0
    71DATA  ·gcExpandersAVX512+0x200(SB)/8, $0
    72DATA  ·gcExpandersAVX512+0x208(SB)/8, $0
    73DATA  ·gcExpandersAVX512+0x210(SB)/8, $0
    74DATA  ·gcExpandersAVX512+0x218(SB)/8, $0
    75
    76TEXT expandAVX512_1<>(SB), NOSPLIT, $0-0
    77	VMOVDQU64 (AX), Z1
    78	VMOVDQU64 64(AX), Z2
    79	RET
    80
    81GLOBL expandAVX512_2_inShuf0<>(SB), RODATA, $0x40
    82DATA  expandAVX512_2_inShuf0<>+0x00(SB)/8, $0x0706050403020100
    83DATA  expandAVX512_2_inShuf0<>+0x08(SB)/8, $0x0706050403020100
    84DATA  expandAVX512_2_inShuf0<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
    85DATA  expandAVX512_2_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
    86DATA  expandAVX512_2_inShuf0<>+0x20(SB)/8, $0x1716151413121110
    87DATA  expandAVX512_2_inShuf0<>+0x28(SB)/8, $0x1716151413121110
    88DATA  expandAVX512_2_inShuf0<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
    89DATA  expandAVX512_2_inShuf0<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
    90
    91GLOBL expandAVX512_2_mat0<>(SB), RODATA, $0x40
    92DATA  expandAVX512_2_mat0<>+0x00(SB)/8, $0x0101020204040808
    93DATA  expandAVX512_2_mat0<>+0x08(SB)/8, $0x1010202040408080
    94DATA  expandAVX512_2_mat0<>+0x10(SB)/8, $0x0101020204040808
    95DATA  expandAVX512_2_mat0<>+0x18(SB)/8, $0x1010202040408080
    96DATA  expandAVX512_2_mat0<>+0x20(SB)/8, $0x0101020204040808
    97DATA  expandAVX512_2_mat0<>+0x28(SB)/8, $0x1010202040408080
    98DATA  expandAVX512_2_mat0<>+0x30(SB)/8, $0x0101020204040808
    99DATA  expandAVX512_2_mat0<>+0x38(SB)/8, $0x1010202040408080
   100
   101GLOBL expandAVX512_2_inShuf1<>(SB), RODATA, $0x40
   102DATA  expandAVX512_2_inShuf1<>+0x00(SB)/8, $0x2726252423222120
   103DATA  expandAVX512_2_inShuf1<>+0x08(SB)/8, $0x2726252423222120
   104DATA  expandAVX512_2_inShuf1<>+0x10(SB)/8, $0x2f2e2d2c2b2a2928
   105DATA  expandAVX512_2_inShuf1<>+0x18(SB)/8, $0x2f2e2d2c2b2a2928
   106DATA  expandAVX512_2_inShuf1<>+0x20(SB)/8, $0x3736353433323130
   107DATA  expandAVX512_2_inShuf1<>+0x28(SB)/8, $0x3736353433323130
   108DATA  expandAVX512_2_inShuf1<>+0x30(SB)/8, $0x3f3e3d3c3b3a3938
   109DATA  expandAVX512_2_inShuf1<>+0x38(SB)/8, $0x3f3e3d3c3b3a3938
   110
   111GLOBL expandAVX512_2_outShufLo(SB), RODATA, $0x40
   112DATA  expandAVX512_2_outShufLo+0x00(SB)/8, $0x0b030a0209010800
   113DATA  expandAVX512_2_outShufLo+0x08(SB)/8, $0x0f070e060d050c04
   114DATA  expandAVX512_2_outShufLo+0x10(SB)/8, $0x1b131a1219111810
   115DATA  expandAVX512_2_outShufLo+0x18(SB)/8, $0x1f171e161d151c14
   116DATA  expandAVX512_2_outShufLo+0x20(SB)/8, $0x2b232a2229212820
   117DATA  expandAVX512_2_outShufLo+0x28(SB)/8, $0x2f272e262d252c24
   118DATA  expandAVX512_2_outShufLo+0x30(SB)/8, $0x3b333a3239313830
   119DATA  expandAVX512_2_outShufLo+0x38(SB)/8, $0x3f373e363d353c34
   120
   121TEXT expandAVX512_2<>(SB), NOSPLIT, $0-0
   122	VMOVDQU64 expandAVX512_2_inShuf0<>(SB), Z0
   123	VMOVDQU64 expandAVX512_2_mat0<>(SB), Z1
   124	VMOVDQU64 expandAVX512_2_inShuf1<>(SB), Z2
   125	VMOVDQU64 expandAVX512_2_outShufLo(SB), Z3
   126	VMOVDQU64 (AX), Z4
   127	VPERMB Z4, Z0, Z0
   128	VGF2P8AFFINEQB $0, Z1, Z0, Z0
   129	VPERMB Z4, Z2, Z2
   130	VGF2P8AFFINEQB $0, Z1, Z2, Z2
   131	VPERMB Z0, Z3, Z1
   132	VPERMB Z2, Z3, Z2
   133	RET
   134
   135GLOBL expandAVX512_3_inShuf0<>(SB), RODATA, $0x40
   136DATA  expandAVX512_3_inShuf0<>+0x00(SB)/8, $0x0706050403020100
   137DATA  expandAVX512_3_inShuf0<>+0x08(SB)/8, $0x0706050403020100
   138DATA  expandAVX512_3_inShuf0<>+0x10(SB)/8, $0x0706050403020100
   139DATA  expandAVX512_3_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
   140DATA  expandAVX512_3_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
   141DATA  expandAVX512_3_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
   142DATA  expandAVX512_3_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
   143DATA  expandAVX512_3_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
   144
   145GLOBL expandAVX512_3_mat0<>(SB), RODATA, $0x40
   146DATA  expandAVX512_3_mat0<>+0x00(SB)/8, $0x0101010202020404
   147DATA  expandAVX512_3_mat0<>+0x08(SB)/8, $0x0408080810101020
   148DATA  expandAVX512_3_mat0<>+0x10(SB)/8, $0x2020404040808080
   149DATA  expandAVX512_3_mat0<>+0x18(SB)/8, $0x0101010202020404
   150DATA  expandAVX512_3_mat0<>+0x20(SB)/8, $0x0408080810101020
   151DATA  expandAVX512_3_mat0<>+0x28(SB)/8, $0x2020404040808080
   152DATA  expandAVX512_3_mat0<>+0x30(SB)/8, $0x0000000000000000
   153DATA  expandAVX512_3_mat0<>+0x38(SB)/8, $0x0000000000000000
   154
   155GLOBL expandAVX512_3_inShuf1<>(SB), RODATA, $0x40
   156DATA  expandAVX512_3_inShuf1<>+0x00(SB)/8, $0x1716151413121110
   157DATA  expandAVX512_3_inShuf1<>+0x08(SB)/8, $0x1716151413121110
   158DATA  expandAVX512_3_inShuf1<>+0x10(SB)/8, $0x1716151413121110
   159DATA  expandAVX512_3_inShuf1<>+0x18(SB)/8, $0x1f1e1d1c1b1a1918
   160DATA  expandAVX512_3_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
   161DATA  expandAVX512_3_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
   162DATA  expandAVX512_3_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
   163DATA  expandAVX512_3_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
   164
   165GLOBL expandAVX512_3_inShuf2<>(SB), RODATA, $0x40
   166DATA  expandAVX512_3_inShuf2<>+0x00(SB)/8, $0x2726252423222120
   167DATA  expandAVX512_3_inShuf2<>+0x08(SB)/8, $0x2726252423222120
   168DATA  expandAVX512_3_inShuf2<>+0x10(SB)/8, $0x2726252423222120
   169DATA  expandAVX512_3_inShuf2<>+0x18(SB)/8, $0xffffffffff2a2928
   170DATA  expandAVX512_3_inShuf2<>+0x20(SB)/8, $0xffffffffff2a2928
   171DATA  expandAVX512_3_inShuf2<>+0x28(SB)/8, $0xffffffffffff2928
   172DATA  expandAVX512_3_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
   173DATA  expandAVX512_3_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
   174
   175GLOBL expandAVX512_3_outShufLo(SB), RODATA, $0x40
   176DATA  expandAVX512_3_outShufLo+0x00(SB)/8, $0x0a02110901100800
   177DATA  expandAVX512_3_outShufLo+0x08(SB)/8, $0x05140c04130b0312
   178DATA  expandAVX512_3_outShufLo+0x10(SB)/8, $0x170f07160e06150d
   179DATA  expandAVX512_3_outShufLo+0x18(SB)/8, $0x221a292119282018
   180DATA  expandAVX512_3_outShufLo+0x20(SB)/8, $0x1d2c241c2b231b2a
   181DATA  expandAVX512_3_outShufLo+0x28(SB)/8, $0x2f271f2e261e2d25
   182DATA  expandAVX512_3_outShufLo+0x30(SB)/8, $0x4a42514941504840
   183DATA  expandAVX512_3_outShufLo+0x38(SB)/8, $0x45544c44534b4352
   184
   185GLOBL expandAVX512_3_outShufHi(SB), RODATA, $0x40
   186DATA  expandAVX512_3_outShufHi+0x00(SB)/8, $0x170f07160e06150d
   187DATA  expandAVX512_3_outShufHi+0x08(SB)/8, $0x221a292119282018
   188DATA  expandAVX512_3_outShufHi+0x10(SB)/8, $0x1d2c241c2b231b2a
   189DATA  expandAVX512_3_outShufHi+0x18(SB)/8, $0x2f271f2e261e2d25
   190DATA  expandAVX512_3_outShufHi+0x20(SB)/8, $0x4a42514941504840
   191DATA  expandAVX512_3_outShufHi+0x28(SB)/8, $0x45544c44534b4352
   192DATA  expandAVX512_3_outShufHi+0x30(SB)/8, $0x574f47564e46554d
   193DATA  expandAVX512_3_outShufHi+0x38(SB)/8, $0x625a696159686058
   194
   195TEXT expandAVX512_3<>(SB), NOSPLIT, $0-0
   196	VMOVDQU64 expandAVX512_3_inShuf0<>(SB), Z0
   197	VMOVDQU64 expandAVX512_3_mat0<>(SB), Z3
   198	VMOVDQU64 expandAVX512_3_inShuf1<>(SB), Z4
   199	VMOVDQU64 expandAVX512_3_inShuf2<>(SB), Z5
   200	VMOVDQU64 expandAVX512_3_outShufLo(SB), Z1
   201	VMOVDQU64 expandAVX512_3_outShufHi(SB), Z2
   202	VMOVDQU64 (AX), Z6
   203	VPERMB Z6, Z0, Z0
   204	VGF2P8AFFINEQB $0, Z3, Z0, Z0
   205	VPERMB Z6, Z4, Z4
   206	VGF2P8AFFINEQB $0, Z3, Z4, Z4
   207	VPERMB Z6, Z5, Z5
   208	VGF2P8AFFINEQB $0, Z3, Z5, Z3
   209	VPERMI2B Z4, Z0, Z1
   210	VPERMI2B Z3, Z4, Z2
   211	RET
   212
   213GLOBL expandAVX512_4_inShuf0<>(SB), RODATA, $0x40
   214DATA  expandAVX512_4_inShuf0<>+0x00(SB)/8, $0x0706050403020100
   215DATA  expandAVX512_4_inShuf0<>+0x08(SB)/8, $0x0706050403020100
   216DATA  expandAVX512_4_inShuf0<>+0x10(SB)/8, $0x0706050403020100
   217DATA  expandAVX512_4_inShuf0<>+0x18(SB)/8, $0x0706050403020100
   218DATA  expandAVX512_4_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
   219DATA  expandAVX512_4_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
   220DATA  expandAVX512_4_inShuf0<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
   221DATA  expandAVX512_4_inShuf0<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
   222
   223GLOBL expandAVX512_4_mat0<>(SB), RODATA, $0x40
   224DATA  expandAVX512_4_mat0<>+0x00(SB)/8, $0x0101010102020202
   225DATA  expandAVX512_4_mat0<>+0x08(SB)/8, $0x0404040408080808
   226DATA  expandAVX512_4_mat0<>+0x10(SB)/8, $0x1010101020202020
   227DATA  expandAVX512_4_mat0<>+0x18(SB)/8, $0x4040404080808080
   228DATA  expandAVX512_4_mat0<>+0x20(SB)/8, $0x0101010102020202
   229DATA  expandAVX512_4_mat0<>+0x28(SB)/8, $0x0404040408080808
   230DATA  expandAVX512_4_mat0<>+0x30(SB)/8, $0x1010101020202020
   231DATA  expandAVX512_4_mat0<>+0x38(SB)/8, $0x4040404080808080
   232
   233GLOBL expandAVX512_4_inShuf1<>(SB), RODATA, $0x40
   234DATA  expandAVX512_4_inShuf1<>+0x00(SB)/8, $0x1716151413121110
   235DATA  expandAVX512_4_inShuf1<>+0x08(SB)/8, $0x1716151413121110
   236DATA  expandAVX512_4_inShuf1<>+0x10(SB)/8, $0x1716151413121110
   237DATA  expandAVX512_4_inShuf1<>+0x18(SB)/8, $0x1716151413121110
   238DATA  expandAVX512_4_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
   239DATA  expandAVX512_4_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
   240DATA  expandAVX512_4_inShuf1<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
   241DATA  expandAVX512_4_inShuf1<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
   242
   243GLOBL expandAVX512_4_outShufLo(SB), RODATA, $0x40
   244DATA  expandAVX512_4_outShufLo+0x00(SB)/8, $0x1911090118100800
   245DATA  expandAVX512_4_outShufLo+0x08(SB)/8, $0x1b130b031a120a02
   246DATA  expandAVX512_4_outShufLo+0x10(SB)/8, $0x1d150d051c140c04
   247DATA  expandAVX512_4_outShufLo+0x18(SB)/8, $0x1f170f071e160e06
   248DATA  expandAVX512_4_outShufLo+0x20(SB)/8, $0x3931292138302820
   249DATA  expandAVX512_4_outShufLo+0x28(SB)/8, $0x3b332b233a322a22
   250DATA  expandAVX512_4_outShufLo+0x30(SB)/8, $0x3d352d253c342c24
   251DATA  expandAVX512_4_outShufLo+0x38(SB)/8, $0x3f372f273e362e26
   252
   253TEXT expandAVX512_4<>(SB), NOSPLIT, $0-0
   254	VMOVDQU64 expandAVX512_4_inShuf0<>(SB), Z0
   255	VMOVDQU64 expandAVX512_4_mat0<>(SB), Z1
   256	VMOVDQU64 expandAVX512_4_inShuf1<>(SB), Z2
   257	VMOVDQU64 expandAVX512_4_outShufLo(SB), Z3
   258	VMOVDQU64 (AX), Z4
   259	VPERMB Z4, Z0, Z0
   260	VGF2P8AFFINEQB $0, Z1, Z0, Z0
   261	VPERMB Z4, Z2, Z2
   262	VGF2P8AFFINEQB $0, Z1, Z2, Z2
   263	VPERMB Z0, Z3, Z1
   264	VPERMB Z2, Z3, Z2
   265	RET
   266
   267GLOBL expandAVX512_6_inShuf0<>(SB), RODATA, $0x40
   268DATA  expandAVX512_6_inShuf0<>+0x00(SB)/8, $0x0706050403020100
   269DATA  expandAVX512_6_inShuf0<>+0x08(SB)/8, $0x0706050403020100
   270DATA  expandAVX512_6_inShuf0<>+0x10(SB)/8, $0x0706050403020100
   271DATA  expandAVX512_6_inShuf0<>+0x18(SB)/8, $0x0706050403020100
   272DATA  expandAVX512_6_inShuf0<>+0x20(SB)/8, $0x0706050403020100
   273DATA  expandAVX512_6_inShuf0<>+0x28(SB)/8, $0x0706050403020100
   274DATA  expandAVX512_6_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
   275DATA  expandAVX512_6_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
   276
   277GLOBL expandAVX512_6_mat0<>(SB), RODATA, $0x40
   278DATA  expandAVX512_6_mat0<>+0x00(SB)/8, $0x0101010101010202
   279DATA  expandAVX512_6_mat0<>+0x08(SB)/8, $0x0202020204040404
   280DATA  expandAVX512_6_mat0<>+0x10(SB)/8, $0x0404080808080808
   281DATA  expandAVX512_6_mat0<>+0x18(SB)/8, $0x1010101010102020
   282DATA  expandAVX512_6_mat0<>+0x20(SB)/8, $0x2020202040404040
   283DATA  expandAVX512_6_mat0<>+0x28(SB)/8, $0x4040808080808080
   284DATA  expandAVX512_6_mat0<>+0x30(SB)/8, $0x0000000000000000
   285DATA  expandAVX512_6_mat0<>+0x38(SB)/8, $0x0000000000000000
   286
   287GLOBL expandAVX512_6_inShuf1<>(SB), RODATA, $0x40
   288DATA  expandAVX512_6_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
   289DATA  expandAVX512_6_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
   290DATA  expandAVX512_6_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
   291DATA  expandAVX512_6_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
   292DATA  expandAVX512_6_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
   293DATA  expandAVX512_6_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
   294DATA  expandAVX512_6_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
   295DATA  expandAVX512_6_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
   296
   297GLOBL expandAVX512_6_inShuf2<>(SB), RODATA, $0x40
   298DATA  expandAVX512_6_inShuf2<>+0x00(SB)/8, $0xffff151413121110
   299DATA  expandAVX512_6_inShuf2<>+0x08(SB)/8, $0xffff151413121110
   300DATA  expandAVX512_6_inShuf2<>+0x10(SB)/8, $0xffffff1413121110
   301DATA  expandAVX512_6_inShuf2<>+0x18(SB)/8, $0xffffff1413121110
   302DATA  expandAVX512_6_inShuf2<>+0x20(SB)/8, $0xffffff1413121110
   303DATA  expandAVX512_6_inShuf2<>+0x28(SB)/8, $0xffffff1413121110
   304DATA  expandAVX512_6_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
   305DATA  expandAVX512_6_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
   306
   307GLOBL expandAVX512_6_outShufLo(SB), RODATA, $0x40
   308DATA  expandAVX512_6_outShufLo+0x00(SB)/8, $0x0901282018100800
   309DATA  expandAVX512_6_outShufLo+0x08(SB)/8, $0x1a120a0229211911
   310DATA  expandAVX512_6_outShufLo+0x10(SB)/8, $0x2b231b130b032a22
   311DATA  expandAVX512_6_outShufLo+0x18(SB)/8, $0x0d052c241c140c04
   312DATA  expandAVX512_6_outShufLo+0x20(SB)/8, $0x1e160e062d251d15
   313DATA  expandAVX512_6_outShufLo+0x28(SB)/8, $0x2f271f170f072e26
   314DATA  expandAVX512_6_outShufLo+0x30(SB)/8, $0x4941686058504840
   315DATA  expandAVX512_6_outShufLo+0x38(SB)/8, $0x5a524a4269615951
   316
   317GLOBL expandAVX512_6_outShufHi(SB), RODATA, $0x40
   318DATA  expandAVX512_6_outShufHi+0x00(SB)/8, $0x2b231b130b032a22
   319DATA  expandAVX512_6_outShufHi+0x08(SB)/8, $0x0d052c241c140c04
   320DATA  expandAVX512_6_outShufHi+0x10(SB)/8, $0x1e160e062d251d15
   321DATA  expandAVX512_6_outShufHi+0x18(SB)/8, $0x2f271f170f072e26
   322DATA  expandAVX512_6_outShufHi+0x20(SB)/8, $0x4941686058504840
   323DATA  expandAVX512_6_outShufHi+0x28(SB)/8, $0x5a524a4269615951
   324DATA  expandAVX512_6_outShufHi+0x30(SB)/8, $0x6b635b534b436a62
   325DATA  expandAVX512_6_outShufHi+0x38(SB)/8, $0x4d456c645c544c44
   326
   327TEXT expandAVX512_6<>(SB), NOSPLIT, $0-0
   328	VMOVDQU64 expandAVX512_6_inShuf0<>(SB), Z0
   329	VMOVDQU64 expandAVX512_6_mat0<>(SB), Z3
   330	VMOVDQU64 expandAVX512_6_inShuf1<>(SB), Z4
   331	VMOVDQU64 expandAVX512_6_inShuf2<>(SB), Z5
   332	VMOVDQU64 expandAVX512_6_outShufLo(SB), Z1
   333	VMOVDQU64 expandAVX512_6_outShufHi(SB), Z2
   334	VMOVDQU64 (AX), Z6
   335	VPERMB Z6, Z0, Z0
   336	VGF2P8AFFINEQB $0, Z3, Z0, Z0
   337	VPERMB Z6, Z4, Z4
   338	VGF2P8AFFINEQB $0, Z3, Z4, Z4
   339	VPERMB Z6, Z5, Z5
   340	VGF2P8AFFINEQB $0, Z3, Z5, Z3
   341	VPERMI2B Z4, Z0, Z1
   342	VPERMI2B Z3, Z4, Z2
   343	RET
   344
   345GLOBL expandAVX512_8_inShuf0<>(SB), RODATA, $0x40
   346DATA  expandAVX512_8_inShuf0<>+0x00(SB)/8, $0x0706050403020100
   347DATA  expandAVX512_8_inShuf0<>+0x08(SB)/8, $0x0706050403020100
   348DATA  expandAVX512_8_inShuf0<>+0x10(SB)/8, $0x0706050403020100
   349DATA  expandAVX512_8_inShuf0<>+0x18(SB)/8, $0x0706050403020100
   350DATA  expandAVX512_8_inShuf0<>+0x20(SB)/8, $0x0706050403020100
   351DATA  expandAVX512_8_inShuf0<>+0x28(SB)/8, $0x0706050403020100
   352DATA  expandAVX512_8_inShuf0<>+0x30(SB)/8, $0x0706050403020100
   353DATA  expandAVX512_8_inShuf0<>+0x38(SB)/8, $0x0706050403020100
   354
   355GLOBL expandAVX512_8_mat0<>(SB), RODATA, $0x40
   356DATA  expandAVX512_8_mat0<>+0x00(SB)/8, $0x0101010101010101
   357DATA  expandAVX512_8_mat0<>+0x08(SB)/8, $0x0202020202020202
   358DATA  expandAVX512_8_mat0<>+0x10(SB)/8, $0x0404040404040404
   359DATA  expandAVX512_8_mat0<>+0x18(SB)/8, $0x0808080808080808
   360DATA  expandAVX512_8_mat0<>+0x20(SB)/8, $0x1010101010101010
   361DATA  expandAVX512_8_mat0<>+0x28(SB)/8, $0x2020202020202020
   362DATA  expandAVX512_8_mat0<>+0x30(SB)/8, $0x4040404040404040
   363DATA  expandAVX512_8_mat0<>+0x38(SB)/8, $0x8080808080808080
   364
   365GLOBL expandAVX512_8_inShuf1<>(SB), RODATA, $0x40
   366DATA  expandAVX512_8_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
   367DATA  expandAVX512_8_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
   368DATA  expandAVX512_8_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
   369DATA  expandAVX512_8_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
   370DATA  expandAVX512_8_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
   371DATA  expandAVX512_8_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
   372DATA  expandAVX512_8_inShuf1<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
   373DATA  expandAVX512_8_inShuf1<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
   374
   375GLOBL expandAVX512_8_outShufLo(SB), RODATA, $0x40
   376DATA  expandAVX512_8_outShufLo+0x00(SB)/8, $0x3830282018100800
   377DATA  expandAVX512_8_outShufLo+0x08(SB)/8, $0x3931292119110901
   378DATA  expandAVX512_8_outShufLo+0x10(SB)/8, $0x3a322a221a120a02
   379DATA  expandAVX512_8_outShufLo+0x18(SB)/8, $0x3b332b231b130b03
   380DATA  expandAVX512_8_outShufLo+0x20(SB)/8, $0x3c342c241c140c04
   381DATA  expandAVX512_8_outShufLo+0x28(SB)/8, $0x3d352d251d150d05
   382DATA  expandAVX512_8_outShufLo+0x30(SB)/8, $0x3e362e261e160e06
   383DATA  expandAVX512_8_outShufLo+0x38(SB)/8, $0x3f372f271f170f07
   384
   385TEXT expandAVX512_8<>(SB), NOSPLIT, $0-0
   386	VMOVDQU64 expandAVX512_8_inShuf0<>(SB), Z0
   387	VMOVDQU64 expandAVX512_8_mat0<>(SB), Z1
   388	VMOVDQU64 expandAVX512_8_inShuf1<>(SB), Z2
   389	VMOVDQU64 expandAVX512_8_outShufLo(SB), Z3
   390	VMOVDQU64 (AX), Z4
   391	VPERMB Z4, Z0, Z0
   392	VGF2P8AFFINEQB $0, Z1, Z0, Z0
   393	VPERMB Z4, Z2, Z2
   394	VGF2P8AFFINEQB $0, Z1, Z2, Z2
   395	VPERMB Z0, Z3, Z1
   396	VPERMB Z2, Z3, Z2
   397	RET
   398
   399GLOBL expandAVX512_10_inShuf0<>(SB), RODATA, $0x40
   400DATA  expandAVX512_10_inShuf0<>+0x00(SB)/8, $0xff06050403020100
   401DATA  expandAVX512_10_inShuf0<>+0x08(SB)/8, $0xff06050403020100
   402DATA  expandAVX512_10_inShuf0<>+0x10(SB)/8, $0xff06050403020100
   403DATA  expandAVX512_10_inShuf0<>+0x18(SB)/8, $0xff06050403020100
   404DATA  expandAVX512_10_inShuf0<>+0x20(SB)/8, $0xffff050403020100
   405DATA  expandAVX512_10_inShuf0<>+0x28(SB)/8, $0xffff050403020100
   406DATA  expandAVX512_10_inShuf0<>+0x30(SB)/8, $0xffff050403020100
   407DATA  expandAVX512_10_inShuf0<>+0x38(SB)/8, $0xffff050403020100
   408
   409GLOBL expandAVX512_10_mat0<>(SB), RODATA, $0x40
   410DATA  expandAVX512_10_mat0<>+0x00(SB)/8, $0x0101010101010101
   411DATA  expandAVX512_10_mat0<>+0x08(SB)/8, $0x0101020202020202
   412DATA  expandAVX512_10_mat0<>+0x10(SB)/8, $0x0202020204040404
   413DATA  expandAVX512_10_mat0<>+0x18(SB)/8, $0x0404040404040808
   414DATA  expandAVX512_10_mat0<>+0x20(SB)/8, $0x0808080808080808
   415DATA  expandAVX512_10_mat0<>+0x28(SB)/8, $0x1010101010101010
   416DATA  expandAVX512_10_mat0<>+0x30(SB)/8, $0x1010202020202020
   417DATA  expandAVX512_10_mat0<>+0x38(SB)/8, $0x2020202040404040
   418
   419GLOBL expandAVX512_10_inShuf1<>(SB), RODATA, $0x40
   420DATA  expandAVX512_10_inShuf1<>+0x00(SB)/8, $0xffff050403020100
   421DATA  expandAVX512_10_inShuf1<>+0x08(SB)/8, $0xffff050403020100
   422DATA  expandAVX512_10_inShuf1<>+0x10(SB)/8, $0xff0c0b0a09080706
   423DATA  expandAVX512_10_inShuf1<>+0x18(SB)/8, $0xff0c0b0a09080706
   424DATA  expandAVX512_10_inShuf1<>+0x20(SB)/8, $0xff0c0b0a09080706
   425DATA  expandAVX512_10_inShuf1<>+0x28(SB)/8, $0xff0c0b0a09080706
   426DATA  expandAVX512_10_inShuf1<>+0x30(SB)/8, $0xffff0b0a09080706
   427DATA  expandAVX512_10_inShuf1<>+0x38(SB)/8, $0xffff0b0a09080706
   428
   429GLOBL expandAVX512_10_mat1<>(SB), RODATA, $0x40
   430DATA  expandAVX512_10_mat1<>+0x00(SB)/8, $0x4040404040408080
   431DATA  expandAVX512_10_mat1<>+0x08(SB)/8, $0x8080808080808080
   432DATA  expandAVX512_10_mat1<>+0x10(SB)/8, $0x0808080808080808
   433DATA  expandAVX512_10_mat1<>+0x18(SB)/8, $0x1010101010101010
   434DATA  expandAVX512_10_mat1<>+0x20(SB)/8, $0x1010202020202020
   435DATA  expandAVX512_10_mat1<>+0x28(SB)/8, $0x2020202040404040
   436DATA  expandAVX512_10_mat1<>+0x30(SB)/8, $0x4040404040408080
   437DATA  expandAVX512_10_mat1<>+0x38(SB)/8, $0x8080808080808080
   438
   439GLOBL expandAVX512_10_inShuf2<>(SB), RODATA, $0x40
   440DATA  expandAVX512_10_inShuf2<>+0x00(SB)/8, $0xffff0c0b0a090807
   441DATA  expandAVX512_10_inShuf2<>+0x08(SB)/8, $0xffff0c0b0a090807
   442DATA  expandAVX512_10_inShuf2<>+0x10(SB)/8, $0xffff0c0b0a090807
   443DATA  expandAVX512_10_inShuf2<>+0x18(SB)/8, $0xffff0c0b0a090807
   444DATA  expandAVX512_10_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
   445DATA  expandAVX512_10_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
   446DATA  expandAVX512_10_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
   447DATA  expandAVX512_10_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
   448
   449GLOBL expandAVX512_10_mat2<>(SB), RODATA, $0x40
   450DATA  expandAVX512_10_mat2<>+0x00(SB)/8, $0x0101010101010101
   451DATA  expandAVX512_10_mat2<>+0x08(SB)/8, $0x0101020202020202
   452DATA  expandAVX512_10_mat2<>+0x10(SB)/8, $0x0202020204040404
   453DATA  expandAVX512_10_mat2<>+0x18(SB)/8, $0x0404040404040808
   454DATA  expandAVX512_10_mat2<>+0x20(SB)/8, $0x0000000000000000
   455DATA  expandAVX512_10_mat2<>+0x28(SB)/8, $0x0000000000000000
   456DATA  expandAVX512_10_mat2<>+0x30(SB)/8, $0x0000000000000000
   457DATA  expandAVX512_10_mat2<>+0x38(SB)/8, $0x0000000000000000
   458
   459GLOBL expandAVX512_10_outShufLo(SB), RODATA, $0x40
   460DATA  expandAVX512_10_outShufLo+0x00(SB)/8, $0x3830282018100800
   461DATA  expandAVX512_10_outShufLo+0x08(SB)/8, $0x2921191109014840
   462DATA  expandAVX512_10_outShufLo+0x10(SB)/8, $0x1a120a0249413931
   463DATA  expandAVX512_10_outShufLo+0x18(SB)/8, $0x0b034a423a322a22
   464DATA  expandAVX512_10_outShufLo+0x20(SB)/8, $0x4b433b332b231b13
   465DATA  expandAVX512_10_outShufLo+0x28(SB)/8, $0x3c342c241c140c04
   466DATA  expandAVX512_10_outShufLo+0x30(SB)/8, $0x2d251d150d054c44
   467DATA  expandAVX512_10_outShufLo+0x38(SB)/8, $0x1e160e064d453d35
   468
   469GLOBL expandAVX512_10_outShufHi(SB), RODATA, $0x40
   470DATA  expandAVX512_10_outShufHi+0x00(SB)/8, $0x4840383028201810
   471DATA  expandAVX512_10_outShufHi+0x08(SB)/8, $0x3931292119115850
   472DATA  expandAVX512_10_outShufHi+0x10(SB)/8, $0x2a221a1259514941
   473DATA  expandAVX512_10_outShufHi+0x18(SB)/8, $0x1b135a524a423a32
   474DATA  expandAVX512_10_outShufHi+0x20(SB)/8, $0x5b534b433b332b23
   475DATA  expandAVX512_10_outShufHi+0x28(SB)/8, $0x4c443c342c241c14
   476DATA  expandAVX512_10_outShufHi+0x30(SB)/8, $0x3d352d251d155c54
   477DATA  expandAVX512_10_outShufHi+0x38(SB)/8, $0x2e261e165d554d45
   478
   479TEXT expandAVX512_10<>(SB), NOSPLIT, $0-0
   480	VMOVDQU64 expandAVX512_10_inShuf0<>(SB), Z0
   481	VMOVDQU64 expandAVX512_10_inShuf1<>(SB), Z3
   482	VMOVDQU64 expandAVX512_10_inShuf2<>(SB), Z4
   483	VMOVDQU64 expandAVX512_10_outShufLo(SB), Z1
   484	VMOVDQU64 expandAVX512_10_outShufHi(SB), Z2
   485	VMOVDQU64 (AX), Z5
   486	VPERMB Z5, Z0, Z0
   487	VGF2P8AFFINEQB $0, expandAVX512_10_mat0<>(SB), Z0, Z0
   488	VPERMB Z5, Z3, Z3
   489	VGF2P8AFFINEQB $0, expandAVX512_10_mat1<>(SB), Z3, Z3
   490	VPERMB Z5, Z4, Z4
   491	VGF2P8AFFINEQB $0, expandAVX512_10_mat2<>(SB), Z4, Z4
   492	VPERMI2B Z3, Z0, Z1
   493	VPERMI2B Z4, Z3, Z2
   494	RET
   495
   496GLOBL expandAVX512_12_inShuf0<>(SB), RODATA, $0x40
   497DATA  expandAVX512_12_inShuf0<>+0x00(SB)/8, $0xffff050403020100
   498DATA  expandAVX512_12_inShuf0<>+0x08(SB)/8, $0xffff050403020100
   499DATA  expandAVX512_12_inShuf0<>+0x10(SB)/8, $0xffff050403020100
   500DATA  expandAVX512_12_inShuf0<>+0x18(SB)/8, $0xffff050403020100
   501DATA  expandAVX512_12_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
   502DATA  expandAVX512_12_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
   503DATA  expandAVX512_12_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
   504DATA  expandAVX512_12_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
   505
   506GLOBL expandAVX512_12_mat0<>(SB), RODATA, $0x40
   507DATA  expandAVX512_12_mat0<>+0x00(SB)/8, $0x0101010101010101
   508DATA  expandAVX512_12_mat0<>+0x08(SB)/8, $0x0101010102020202
   509DATA  expandAVX512_12_mat0<>+0x10(SB)/8, $0x0202020202020202
   510DATA  expandAVX512_12_mat0<>+0x18(SB)/8, $0x0404040404040404
   511DATA  expandAVX512_12_mat0<>+0x20(SB)/8, $0x0404040408080808
   512DATA  expandAVX512_12_mat0<>+0x28(SB)/8, $0x0808080808080808
   513DATA  expandAVX512_12_mat0<>+0x30(SB)/8, $0x1010101010101010
   514DATA  expandAVX512_12_mat0<>+0x38(SB)/8, $0x1010101020202020
   515
   516GLOBL expandAVX512_12_inShuf1<>(SB), RODATA, $0x40
   517DATA  expandAVX512_12_inShuf1<>+0x00(SB)/8, $0xffffff0403020100
   518DATA  expandAVX512_12_inShuf1<>+0x08(SB)/8, $0xffffff0403020100
   519DATA  expandAVX512_12_inShuf1<>+0x10(SB)/8, $0xffffff0403020100
   520DATA  expandAVX512_12_inShuf1<>+0x18(SB)/8, $0xffffff0403020100
   521DATA  expandAVX512_12_inShuf1<>+0x20(SB)/8, $0xffff0a0908070605
   522DATA  expandAVX512_12_inShuf1<>+0x28(SB)/8, $0xffff0a0908070605
   523DATA  expandAVX512_12_inShuf1<>+0x30(SB)/8, $0xffff0a0908070605
   524DATA  expandAVX512_12_inShuf1<>+0x38(SB)/8, $0xffff0a0908070605
   525
   526GLOBL expandAVX512_12_mat1<>(SB), RODATA, $0x40
   527DATA  expandAVX512_12_mat1<>+0x00(SB)/8, $0x2020202020202020
   528DATA  expandAVX512_12_mat1<>+0x08(SB)/8, $0x4040404040404040
   529DATA  expandAVX512_12_mat1<>+0x10(SB)/8, $0x4040404080808080
   530DATA  expandAVX512_12_mat1<>+0x18(SB)/8, $0x8080808080808080
   531DATA  expandAVX512_12_mat1<>+0x20(SB)/8, $0x0404040408080808
   532DATA  expandAVX512_12_mat1<>+0x28(SB)/8, $0x0808080808080808
   533DATA  expandAVX512_12_mat1<>+0x30(SB)/8, $0x1010101010101010
   534DATA  expandAVX512_12_mat1<>+0x38(SB)/8, $0x1010101020202020
   535
   536GLOBL expandAVX512_12_inShuf2<>(SB), RODATA, $0x40
   537DATA  expandAVX512_12_inShuf2<>+0x00(SB)/8, $0xffffff0908070605
   538DATA  expandAVX512_12_inShuf2<>+0x08(SB)/8, $0xffffff0908070605
   539DATA  expandAVX512_12_inShuf2<>+0x10(SB)/8, $0xffffff0908070605
   540DATA  expandAVX512_12_inShuf2<>+0x18(SB)/8, $0xffffff0908070605
   541DATA  expandAVX512_12_inShuf2<>+0x20(SB)/8, $0xffffff0a09080706
   542DATA  expandAVX512_12_inShuf2<>+0x28(SB)/8, $0xffffff0a09080706
   543DATA  expandAVX512_12_inShuf2<>+0x30(SB)/8, $0xffffff0a09080706
   544DATA  expandAVX512_12_inShuf2<>+0x38(SB)/8, $0xffffff0a09080706
   545
   546GLOBL expandAVX512_12_mat2<>(SB), RODATA, $0x40
   547DATA  expandAVX512_12_mat2<>+0x00(SB)/8, $0x2020202020202020
   548DATA  expandAVX512_12_mat2<>+0x08(SB)/8, $0x4040404040404040
   549DATA  expandAVX512_12_mat2<>+0x10(SB)/8, $0x4040404080808080
   550DATA  expandAVX512_12_mat2<>+0x18(SB)/8, $0x8080808080808080
   551DATA  expandAVX512_12_mat2<>+0x20(SB)/8, $0x0101010101010101
   552DATA  expandAVX512_12_mat2<>+0x28(SB)/8, $0x0101010102020202
   553DATA  expandAVX512_12_mat2<>+0x30(SB)/8, $0x0202020202020202
   554DATA  expandAVX512_12_mat2<>+0x38(SB)/8, $0x0404040404040404
   555
   556GLOBL expandAVX512_12_outShufLo(SB), RODATA, $0x40
   557DATA  expandAVX512_12_outShufLo+0x00(SB)/8, $0x3830282018100800
   558DATA  expandAVX512_12_outShufLo+0x08(SB)/8, $0x1911090158504840
   559DATA  expandAVX512_12_outShufLo+0x10(SB)/8, $0x5951494139312921
   560DATA  expandAVX512_12_outShufLo+0x18(SB)/8, $0x3a322a221a120a02
   561DATA  expandAVX512_12_outShufLo+0x20(SB)/8, $0x1b130b035a524a42
   562DATA  expandAVX512_12_outShufLo+0x28(SB)/8, $0x5b534b433b332b23
   563DATA  expandAVX512_12_outShufLo+0x30(SB)/8, $0x3c342c241c140c04
   564DATA  expandAVX512_12_outShufLo+0x38(SB)/8, $0x1d150d055c544c44
   565
   566GLOBL expandAVX512_12_outShufHi(SB), RODATA, $0x40
   567DATA  expandAVX512_12_outShufHi+0x00(SB)/8, $0x5850484038302820
   568DATA  expandAVX512_12_outShufHi+0x08(SB)/8, $0x3931292178706860
   569DATA  expandAVX512_12_outShufHi+0x10(SB)/8, $0x7971696159514941
   570DATA  expandAVX512_12_outShufHi+0x18(SB)/8, $0x5a524a423a322a22
   571DATA  expandAVX512_12_outShufHi+0x20(SB)/8, $0x3b332b237a726a62
   572DATA  expandAVX512_12_outShufHi+0x28(SB)/8, $0x7b736b635b534b43
   573DATA  expandAVX512_12_outShufHi+0x30(SB)/8, $0x5c544c443c342c24
   574DATA  expandAVX512_12_outShufHi+0x38(SB)/8, $0x3d352d257c746c64
   575
   576TEXT expandAVX512_12<>(SB), NOSPLIT, $0-0
   577	VMOVDQU64 expandAVX512_12_inShuf0<>(SB), Z0
   578	VMOVDQU64 expandAVX512_12_inShuf1<>(SB), Z3
   579	VMOVDQU64 expandAVX512_12_inShuf2<>(SB), Z4
   580	VMOVDQU64 expandAVX512_12_outShufLo(SB), Z1
   581	VMOVDQU64 expandAVX512_12_outShufHi(SB), Z2
   582	VMOVDQU64 (AX), Z5
   583	VPERMB Z5, Z0, Z0
   584	VGF2P8AFFINEQB $0, expandAVX512_12_mat0<>(SB), Z0, Z0
   585	VPERMB Z5, Z3, Z3
   586	VGF2P8AFFINEQB $0, expandAVX512_12_mat1<>(SB), Z3, Z3
   587	VPERMB Z5, Z4, Z4
   588	VGF2P8AFFINEQB $0, expandAVX512_12_mat2<>(SB), Z4, Z4
   589	VPERMI2B Z3, Z0, Z1
   590	VPERMI2B Z4, Z3, Z2
   591	RET
   592
   593GLOBL expandAVX512_14_inShuf0<>(SB), RODATA, $0x40
   594DATA  expandAVX512_14_inShuf0<>+0x00(SB)/8, $0xffffff0403020100
   595DATA  expandAVX512_14_inShuf0<>+0x08(SB)/8, $0xffffff0403020100
   596DATA  expandAVX512_14_inShuf0<>+0x10(SB)/8, $0xffffff0403020100
   597DATA  expandAVX512_14_inShuf0<>+0x18(SB)/8, $0xffffff0403020100
   598DATA  expandAVX512_14_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
   599DATA  expandAVX512_14_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
   600DATA  expandAVX512_14_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
   601DATA  expandAVX512_14_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
   602
   603GLOBL expandAVX512_14_mat0<>(SB), RODATA, $0x40
   604DATA  expandAVX512_14_mat0<>+0x00(SB)/8, $0x0101010101010101
   605DATA  expandAVX512_14_mat0<>+0x08(SB)/8, $0x0101010101010202
   606DATA  expandAVX512_14_mat0<>+0x10(SB)/8, $0x0202020202020202
   607DATA  expandAVX512_14_mat0<>+0x18(SB)/8, $0x0202020204040404
   608DATA  expandAVX512_14_mat0<>+0x20(SB)/8, $0x0404040404040404
   609DATA  expandAVX512_14_mat0<>+0x28(SB)/8, $0x0404080808080808
   610DATA  expandAVX512_14_mat0<>+0x30(SB)/8, $0x0808080808080808
   611DATA  expandAVX512_14_mat0<>+0x38(SB)/8, $0x1010101010101010
   612
   613GLOBL expandAVX512_14_inShuf1<>(SB), RODATA, $0x40
   614DATA  expandAVX512_14_inShuf1<>+0x00(SB)/8, $0xffffffff03020100
   615DATA  expandAVX512_14_inShuf1<>+0x08(SB)/8, $0xffffffff03020100
   616DATA  expandAVX512_14_inShuf1<>+0x10(SB)/8, $0xffffffff03020100
   617DATA  expandAVX512_14_inShuf1<>+0x18(SB)/8, $0xffffffff03020100
   618DATA  expandAVX512_14_inShuf1<>+0x20(SB)/8, $0xffffffff03020100
   619DATA  expandAVX512_14_inShuf1<>+0x28(SB)/8, $0xffffffff03020100
   620DATA  expandAVX512_14_inShuf1<>+0x30(SB)/8, $0xffffff0807060504
   621DATA  expandAVX512_14_inShuf1<>+0x38(SB)/8, $0xffffff0807060504
   622
   623GLOBL expandAVX512_14_mat1<>(SB), RODATA, $0x40
   624DATA  expandAVX512_14_mat1<>+0x00(SB)/8, $0x1010101010102020
   625DATA  expandAVX512_14_mat1<>+0x08(SB)/8, $0x2020202020202020
   626DATA  expandAVX512_14_mat1<>+0x10(SB)/8, $0x2020202040404040
   627DATA  expandAVX512_14_mat1<>+0x18(SB)/8, $0x4040404040404040
   628DATA  expandAVX512_14_mat1<>+0x20(SB)/8, $0x4040808080808080
   629DATA  expandAVX512_14_mat1<>+0x28(SB)/8, $0x8080808080808080
   630DATA  expandAVX512_14_mat1<>+0x30(SB)/8, $0x1010101010102020
   631DATA  expandAVX512_14_mat1<>+0x38(SB)/8, $0x2020202020202020
   632
   633GLOBL expandAVX512_14_inShuf2<>(SB), RODATA, $0x40
   634DATA  expandAVX512_14_inShuf2<>+0x00(SB)/8, $0xffffff0807060504
   635DATA  expandAVX512_14_inShuf2<>+0x08(SB)/8, $0xffffff0807060504
   636DATA  expandAVX512_14_inShuf2<>+0x10(SB)/8, $0xffffff0807060504
   637DATA  expandAVX512_14_inShuf2<>+0x18(SB)/8, $0xffffff0807060504
   638DATA  expandAVX512_14_inShuf2<>+0x20(SB)/8, $0xffffff0908070605
   639DATA  expandAVX512_14_inShuf2<>+0x28(SB)/8, $0xffffff0908070605
   640DATA  expandAVX512_14_inShuf2<>+0x30(SB)/8, $0xffffffff08070605
   641DATA  expandAVX512_14_inShuf2<>+0x38(SB)/8, $0xffffffff08070605
   642
   643GLOBL expandAVX512_14_mat2<>(SB), RODATA, $0x40
   644DATA  expandAVX512_14_mat2<>+0x00(SB)/8, $0x2020202040404040
   645DATA  expandAVX512_14_mat2<>+0x08(SB)/8, $0x4040404040404040
   646DATA  expandAVX512_14_mat2<>+0x10(SB)/8, $0x4040808080808080
   647DATA  expandAVX512_14_mat2<>+0x18(SB)/8, $0x8080808080808080
   648DATA  expandAVX512_14_mat2<>+0x20(SB)/8, $0x0101010101010101
   649DATA  expandAVX512_14_mat2<>+0x28(SB)/8, $0x0101010101010202
   650DATA  expandAVX512_14_mat2<>+0x30(SB)/8, $0x0202020202020202
   651DATA  expandAVX512_14_mat2<>+0x38(SB)/8, $0x0202020204040404
   652
   653GLOBL expandAVX512_14_inShuf3<>(SB), RODATA, $0x40
   654DATA  expandAVX512_14_inShuf3<>+0x00(SB)/8, $0xffffffff08070605
   655DATA  expandAVX512_14_inShuf3<>+0x08(SB)/8, $0xffffffff08070605
   656DATA  expandAVX512_14_inShuf3<>+0x10(SB)/8, $0xffffffff08070605
   657DATA  expandAVX512_14_inShuf3<>+0x18(SB)/8, $0xffffffff08070605
   658DATA  expandAVX512_14_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
   659DATA  expandAVX512_14_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
   660DATA  expandAVX512_14_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
   661DATA  expandAVX512_14_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
   662
   663GLOBL expandAVX512_14_mat3<>(SB), RODATA, $0x40
   664DATA  expandAVX512_14_mat3<>+0x00(SB)/8, $0x0404040404040404
   665DATA  expandAVX512_14_mat3<>+0x08(SB)/8, $0x0404080808080808
   666DATA  expandAVX512_14_mat3<>+0x10(SB)/8, $0x0808080808080808
   667DATA  expandAVX512_14_mat3<>+0x18(SB)/8, $0x1010101010101010
   668DATA  expandAVX512_14_mat3<>+0x20(SB)/8, $0x0000000000000000
   669DATA  expandAVX512_14_mat3<>+0x28(SB)/8, $0x0000000000000000
   670DATA  expandAVX512_14_mat3<>+0x30(SB)/8, $0x0000000000000000
   671DATA  expandAVX512_14_mat3<>+0x38(SB)/8, $0x0000000000000000
   672
   673GLOBL expandAVX512_14_outShufLo(SB), RODATA, $0x40
   674DATA  expandAVX512_14_outShufLo+0x00(SB)/8, $0x3830282018100800
   675DATA  expandAVX512_14_outShufLo+0x08(SB)/8, $0x0901686058504840
   676DATA  expandAVX512_14_outShufLo+0x10(SB)/8, $0x4941393129211911
   677DATA  expandAVX512_14_outShufLo+0x18(SB)/8, $0x1a120a0269615951
   678DATA  expandAVX512_14_outShufLo+0x20(SB)/8, $0x5a524a423a322a22
   679DATA  expandAVX512_14_outShufLo+0x28(SB)/8, $0x2b231b130b036a62
   680DATA  expandAVX512_14_outShufLo+0x30(SB)/8, $0x6b635b534b433b33
   681DATA  expandAVX512_14_outShufLo+0x38(SB)/8, $0x3c342c241c140c04
   682
   683GLOBL expandAVX512_14_outShufHi0(SB), RODATA, $0x40
   684DATA  expandAVX512_14_outShufHi0+0x00(SB)/8, $0x6860585048403830
   685DATA  expandAVX512_14_outShufHi0+0x08(SB)/8, $0x3931ffffffff7870
   686DATA  expandAVX512_14_outShufHi0+0x10(SB)/8, $0x7971696159514941
   687DATA  expandAVX512_14_outShufHi0+0x18(SB)/8, $0x4a423a32ffffffff
   688DATA  expandAVX512_14_outShufHi0+0x20(SB)/8, $0xffff7a726a625a52
   689DATA  expandAVX512_14_outShufHi0+0x28(SB)/8, $0x5b534b433b33ffff
   690DATA  expandAVX512_14_outShufHi0+0x30(SB)/8, $0xffffffff7b736b63
   691DATA  expandAVX512_14_outShufHi0+0x38(SB)/8, $0x6c645c544c443c34
   692
   693GLOBL expandAVX512_14_outShufHi1(SB), RODATA, $0x40
   694DATA  expandAVX512_14_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
   695DATA  expandAVX512_14_outShufHi1+0x08(SB)/8, $0xffff18100800ffff
   696DATA  expandAVX512_14_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
   697DATA  expandAVX512_14_outShufHi1+0x18(SB)/8, $0xffffffff19110901
   698DATA  expandAVX512_14_outShufHi1+0x20(SB)/8, $0x0a02ffffffffffff
   699DATA  expandAVX512_14_outShufHi1+0x28(SB)/8, $0xffffffffffff1a12
   700DATA  expandAVX512_14_outShufHi1+0x30(SB)/8, $0x1b130b03ffffffff
   701DATA  expandAVX512_14_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
   702
   703TEXT expandAVX512_14<>(SB), NOSPLIT, $0-0
   704	VMOVDQU64 expandAVX512_14_inShuf0<>(SB), Z0
   705	VMOVDQU64 expandAVX512_14_inShuf1<>(SB), Z2
   706	VMOVDQU64 expandAVX512_14_inShuf2<>(SB), Z3
   707	VMOVDQU64 expandAVX512_14_inShuf3<>(SB), Z4
   708	VMOVDQU64 expandAVX512_14_outShufLo(SB), Z1
   709	VMOVDQU64 expandAVX512_14_outShufHi0(SB), Z5
   710	VMOVDQU64 expandAVX512_14_outShufHi1(SB), Z6
   711	VMOVDQU64 (AX), Z7
   712	VPERMB Z7, Z0, Z0
   713	VGF2P8AFFINEQB $0, expandAVX512_14_mat0<>(SB), Z0, Z0
   714	VPERMB Z7, Z2, Z2
   715	VGF2P8AFFINEQB $0, expandAVX512_14_mat1<>(SB), Z2, Z2
   716	VPERMB Z7, Z3, Z3
   717	VGF2P8AFFINEQB $0, expandAVX512_14_mat2<>(SB), Z3, Z3
   718	VPERMB Z7, Z4, Z4
   719	VGF2P8AFFINEQB $0, expandAVX512_14_mat3<>(SB), Z4, Z4
   720	VPERMI2B Z2, Z0, Z1
   721	MOVQ $0xff0ffc3ff0ffc3ff, AX
   722	KMOVQ AX, K1
   723	VPERMI2B.Z Z3, Z2, K1, Z5
   724	MOVQ $0xf003c00f003c00, AX
   725	KMOVQ AX, K1
   726	VPERMB.Z Z4, Z6, K1, Z0
   727	VPORQ Z0, Z5, Z2
   728	RET
   729
   730GLOBL expandAVX512_16_inShuf0<>(SB), RODATA, $0x40
   731DATA  expandAVX512_16_inShuf0<>+0x00(SB)/8, $0x0303020201010000
   732DATA  expandAVX512_16_inShuf0<>+0x08(SB)/8, $0x0303020201010000
   733DATA  expandAVX512_16_inShuf0<>+0x10(SB)/8, $0x0303020201010000
   734DATA  expandAVX512_16_inShuf0<>+0x18(SB)/8, $0x0303020201010000
   735DATA  expandAVX512_16_inShuf0<>+0x20(SB)/8, $0x0303020201010000
   736DATA  expandAVX512_16_inShuf0<>+0x28(SB)/8, $0x0303020201010000
   737DATA  expandAVX512_16_inShuf0<>+0x30(SB)/8, $0x0303020201010000
   738DATA  expandAVX512_16_inShuf0<>+0x38(SB)/8, $0x0303020201010000
   739
   740GLOBL expandAVX512_16_mat0<>(SB), RODATA, $0x40
   741DATA  expandAVX512_16_mat0<>+0x00(SB)/8, $0x0101010101010101
   742DATA  expandAVX512_16_mat0<>+0x08(SB)/8, $0x0202020202020202
   743DATA  expandAVX512_16_mat0<>+0x10(SB)/8, $0x0404040404040404
   744DATA  expandAVX512_16_mat0<>+0x18(SB)/8, $0x0808080808080808
   745DATA  expandAVX512_16_mat0<>+0x20(SB)/8, $0x1010101010101010
   746DATA  expandAVX512_16_mat0<>+0x28(SB)/8, $0x2020202020202020
   747DATA  expandAVX512_16_mat0<>+0x30(SB)/8, $0x4040404040404040
   748DATA  expandAVX512_16_mat0<>+0x38(SB)/8, $0x8080808080808080
   749
   750GLOBL expandAVX512_16_inShuf1<>(SB), RODATA, $0x40
   751DATA  expandAVX512_16_inShuf1<>+0x00(SB)/8, $0x0707060605050404
   752DATA  expandAVX512_16_inShuf1<>+0x08(SB)/8, $0x0707060605050404
   753DATA  expandAVX512_16_inShuf1<>+0x10(SB)/8, $0x0707060605050404
   754DATA  expandAVX512_16_inShuf1<>+0x18(SB)/8, $0x0707060605050404
   755DATA  expandAVX512_16_inShuf1<>+0x20(SB)/8, $0x0707060605050404
   756DATA  expandAVX512_16_inShuf1<>+0x28(SB)/8, $0x0707060605050404
   757DATA  expandAVX512_16_inShuf1<>+0x30(SB)/8, $0x0707060605050404
   758DATA  expandAVX512_16_inShuf1<>+0x38(SB)/8, $0x0707060605050404
   759
   760GLOBL expandAVX512_16_outShufLo(SB), RODATA, $0x40
   761DATA  expandAVX512_16_outShufLo+0x00(SB)/8, $0x1918111009080100
   762DATA  expandAVX512_16_outShufLo+0x08(SB)/8, $0x3938313029282120
   763DATA  expandAVX512_16_outShufLo+0x10(SB)/8, $0x1b1a13120b0a0302
   764DATA  expandAVX512_16_outShufLo+0x18(SB)/8, $0x3b3a33322b2a2322
   765DATA  expandAVX512_16_outShufLo+0x20(SB)/8, $0x1d1c15140d0c0504
   766DATA  expandAVX512_16_outShufLo+0x28(SB)/8, $0x3d3c35342d2c2524
   767DATA  expandAVX512_16_outShufLo+0x30(SB)/8, $0x1f1e17160f0e0706
   768DATA  expandAVX512_16_outShufLo+0x38(SB)/8, $0x3f3e37362f2e2726
   769
   770TEXT expandAVX512_16<>(SB), NOSPLIT, $0-0
   771	VMOVDQU64 expandAVX512_16_inShuf0<>(SB), Z0
   772	VMOVDQU64 expandAVX512_16_mat0<>(SB), Z1
   773	VMOVDQU64 expandAVX512_16_inShuf1<>(SB), Z2
   774	VMOVDQU64 expandAVX512_16_outShufLo(SB), Z3
   775	VMOVDQU64 (AX), Z4
   776	VPERMB Z4, Z0, Z0
   777	VGF2P8AFFINEQB $0, Z1, Z0, Z0
   778	VPERMB Z4, Z2, Z2
   779	VGF2P8AFFINEQB $0, Z1, Z2, Z2
   780	VPERMB Z0, Z3, Z1
   781	VPERMB Z2, Z3, Z2
   782	RET
   783
   784GLOBL expandAVX512_18_inShuf0<>(SB), RODATA, $0x40
   785DATA  expandAVX512_18_inShuf0<>+0x00(SB)/8, $0x0303020201010000
   786DATA  expandAVX512_18_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
   787DATA  expandAVX512_18_inShuf0<>+0x10(SB)/8, $0xffffffff03020100
   788DATA  expandAVX512_18_inShuf0<>+0x18(SB)/8, $0xffffffff03020100
   789DATA  expandAVX512_18_inShuf0<>+0x20(SB)/8, $0xffffffff03020100
   790DATA  expandAVX512_18_inShuf0<>+0x28(SB)/8, $0xffffffff03020100
   791DATA  expandAVX512_18_inShuf0<>+0x30(SB)/8, $0x0303020201010000
   792DATA  expandAVX512_18_inShuf0<>+0x38(SB)/8, $0xff03020201010000
   793
   794GLOBL expandAVX512_18_mat0<>(SB), RODATA, $0x40
   795DATA  expandAVX512_18_mat0<>+0x00(SB)/8, $0x0101010101010101
   796DATA  expandAVX512_18_mat0<>+0x08(SB)/8, $0x0101020202020202
   797DATA  expandAVX512_18_mat0<>+0x10(SB)/8, $0x0202020202020202
   798DATA  expandAVX512_18_mat0<>+0x18(SB)/8, $0x0202020204040404
   799DATA  expandAVX512_18_mat0<>+0x20(SB)/8, $0x0404040404040404
   800DATA  expandAVX512_18_mat0<>+0x28(SB)/8, $0x0404040404040808
   801DATA  expandAVX512_18_mat0<>+0x30(SB)/8, $0x0808080808080808
   802DATA  expandAVX512_18_mat0<>+0x38(SB)/8, $0x1010101010101010
   803
   804GLOBL expandAVX512_18_inShuf1<>(SB), RODATA, $0x40
   805DATA  expandAVX512_18_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
   806DATA  expandAVX512_18_inShuf1<>+0x08(SB)/8, $0xffffffffff020100
   807DATA  expandAVX512_18_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
   808DATA  expandAVX512_18_inShuf1<>+0x18(SB)/8, $0xffffffffff020100
   809DATA  expandAVX512_18_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
   810DATA  expandAVX512_18_inShuf1<>+0x28(SB)/8, $0xffff020201010000
   811DATA  expandAVX512_18_inShuf1<>+0x30(SB)/8, $0xff06060505040403
   812DATA  expandAVX512_18_inShuf1<>+0x38(SB)/8, $0xffffffff06050403
   813
   814GLOBL expandAVX512_18_mat1<>(SB), RODATA, $0x40
   815DATA  expandAVX512_18_mat1<>+0x00(SB)/8, $0x1010202020202020
   816DATA  expandAVX512_18_mat1<>+0x08(SB)/8, $0x2020202020202020
   817DATA  expandAVX512_18_mat1<>+0x10(SB)/8, $0x2020202040404040
   818DATA  expandAVX512_18_mat1<>+0x18(SB)/8, $0x4040404040404040
   819DATA  expandAVX512_18_mat1<>+0x20(SB)/8, $0x4040404040408080
   820DATA  expandAVX512_18_mat1<>+0x28(SB)/8, $0x8080808080808080
   821DATA  expandAVX512_18_mat1<>+0x30(SB)/8, $0x1010101010101010
   822DATA  expandAVX512_18_mat1<>+0x38(SB)/8, $0x1010202020202020
   823
   824GLOBL expandAVX512_18_inShuf2<>(SB), RODATA, $0x40
   825DATA  expandAVX512_18_inShuf2<>+0x00(SB)/8, $0xffffffff06050403
   826DATA  expandAVX512_18_inShuf2<>+0x08(SB)/8, $0xffffffff06050403
   827DATA  expandAVX512_18_inShuf2<>+0x10(SB)/8, $0xffffffff06050403
   828DATA  expandAVX512_18_inShuf2<>+0x18(SB)/8, $0xffffffff06050403
   829DATA  expandAVX512_18_inShuf2<>+0x20(SB)/8, $0x0606050504040303
   830DATA  expandAVX512_18_inShuf2<>+0x28(SB)/8, $0x0707060605050404
   831DATA  expandAVX512_18_inShuf2<>+0x30(SB)/8, $0xffffffffff060504
   832DATA  expandAVX512_18_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
   833
   834GLOBL expandAVX512_18_mat2<>(SB), RODATA, $0x40
   835DATA  expandAVX512_18_mat2<>+0x00(SB)/8, $0x2020202020202020
   836DATA  expandAVX512_18_mat2<>+0x08(SB)/8, $0x2020202040404040
   837DATA  expandAVX512_18_mat2<>+0x10(SB)/8, $0x4040404040404040
   838DATA  expandAVX512_18_mat2<>+0x18(SB)/8, $0x4040404040408080
   839DATA  expandAVX512_18_mat2<>+0x20(SB)/8, $0x8080808080808080
   840DATA  expandAVX512_18_mat2<>+0x28(SB)/8, $0x0101010101010101
   841DATA  expandAVX512_18_mat2<>+0x30(SB)/8, $0x0101020202020202
   842DATA  expandAVX512_18_mat2<>+0x38(SB)/8, $0x0202020202020202
   843
   844GLOBL expandAVX512_18_inShuf3<>(SB), RODATA, $0x40
   845DATA  expandAVX512_18_inShuf3<>+0x00(SB)/8, $0xffffffffff060504
   846DATA  expandAVX512_18_inShuf3<>+0x08(SB)/8, $0xffffffffff060504
   847DATA  expandAVX512_18_inShuf3<>+0x10(SB)/8, $0xffffffffff060504
   848DATA  expandAVX512_18_inShuf3<>+0x18(SB)/8, $0xffff060605050404
   849DATA  expandAVX512_18_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
   850DATA  expandAVX512_18_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
   851DATA  expandAVX512_18_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
   852DATA  expandAVX512_18_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
   853
   854GLOBL expandAVX512_18_mat3<>(SB), RODATA, $0x40
   855DATA  expandAVX512_18_mat3<>+0x00(SB)/8, $0x0202020204040404
   856DATA  expandAVX512_18_mat3<>+0x08(SB)/8, $0x0404040404040404
   857DATA  expandAVX512_18_mat3<>+0x10(SB)/8, $0x0404040404040808
   858DATA  expandAVX512_18_mat3<>+0x18(SB)/8, $0x0808080808080808
   859DATA  expandAVX512_18_mat3<>+0x20(SB)/8, $0x0000000000000000
   860DATA  expandAVX512_18_mat3<>+0x28(SB)/8, $0x0000000000000000
   861DATA  expandAVX512_18_mat3<>+0x30(SB)/8, $0x0000000000000000
   862DATA  expandAVX512_18_mat3<>+0x38(SB)/8, $0x0000000000000000
   863
   864GLOBL expandAVX512_18_outShufLo(SB), RODATA, $0x40
   865DATA  expandAVX512_18_outShufLo+0x00(SB)/8, $0x3028201810080100
   866DATA  expandAVX512_18_outShufLo+0x08(SB)/8, $0x6058504840393831
   867DATA  expandAVX512_18_outShufLo+0x10(SB)/8, $0x2119110903026968
   868DATA  expandAVX512_18_outShufLo+0x18(SB)/8, $0x5149413b3a333229
   869DATA  expandAVX512_18_outShufLo+0x20(SB)/8, $0x120a05046b6a6159
   870DATA  expandAVX512_18_outShufLo+0x28(SB)/8, $0x423d3c35342a221a
   871DATA  expandAVX512_18_outShufLo+0x30(SB)/8, $0x07066d6c625a524a
   872DATA  expandAVX512_18_outShufLo+0x38(SB)/8, $0x3e37362b231b130b
   873
   874GLOBL expandAVX512_18_outShufHi0(SB), RODATA, $0x40
   875DATA  expandAVX512_18_outShufHi0+0x00(SB)/8, $0x6160585048403830
   876DATA  expandAVX512_18_outShufHi0+0x08(SB)/8, $0xffffffff78706968
   877DATA  expandAVX512_18_outShufHi0+0x10(SB)/8, $0x59514941393231ff
   878DATA  expandAVX512_18_outShufHi0+0x18(SB)/8, $0xffff79716b6a6362
   879DATA  expandAVX512_18_outShufHi0+0x20(SB)/8, $0x4a423a3433ffffff
   880DATA  expandAVX512_18_outShufHi0+0x28(SB)/8, $0x7a726d6c65645a52
   881DATA  expandAVX512_18_outShufHi0+0x30(SB)/8, $0x3b3635ffffffffff
   882DATA  expandAVX512_18_outShufHi0+0x38(SB)/8, $0x6f6e67665b534b43
   883
   884GLOBL expandAVX512_18_outShufHi1(SB), RODATA, $0x40
   885DATA  expandAVX512_18_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
   886DATA  expandAVX512_18_outShufHi1+0x08(SB)/8, $0x18100800ffffffff
   887DATA  expandAVX512_18_outShufHi1+0x10(SB)/8, $0xffffffffffffff19
   888DATA  expandAVX512_18_outShufHi1+0x18(SB)/8, $0x0901ffffffffffff
   889DATA  expandAVX512_18_outShufHi1+0x20(SB)/8, $0xffffffffff1b1a11
   890DATA  expandAVX512_18_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
   891DATA  expandAVX512_18_outShufHi1+0x30(SB)/8, $0xffffff1d1c120a02
   892DATA  expandAVX512_18_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
   893
   894TEXT expandAVX512_18<>(SB), NOSPLIT, $0-0
   895	VMOVDQU64 expandAVX512_18_inShuf0<>(SB), Z0
   896	VMOVDQU64 expandAVX512_18_inShuf1<>(SB), Z2
   897	VMOVDQU64 expandAVX512_18_inShuf2<>(SB), Z3
   898	VMOVDQU64 expandAVX512_18_inShuf3<>(SB), Z4
   899	VMOVDQU64 expandAVX512_18_outShufLo(SB), Z1
   900	VMOVDQU64 expandAVX512_18_outShufHi0(SB), Z5
   901	VMOVDQU64 expandAVX512_18_outShufHi1(SB), Z6
   902	VMOVDQU64 (AX), Z7
   903	VPERMB Z7, Z0, Z0
   904	VGF2P8AFFINEQB $0, expandAVX512_18_mat0<>(SB), Z0, Z0
   905	VPERMB Z7, Z2, Z2
   906	VGF2P8AFFINEQB $0, expandAVX512_18_mat1<>(SB), Z2, Z2
   907	VPERMB Z7, Z3, Z3
   908	VGF2P8AFFINEQB $0, expandAVX512_18_mat2<>(SB), Z3, Z3
   909	VPERMB Z7, Z4, Z4
   910	VGF2P8AFFINEQB $0, expandAVX512_18_mat3<>(SB), Z4, Z4
   911	VPERMI2B Z2, Z0, Z1
   912	MOVQ $0xffe0fff83ffe0fff, AX
   913	KMOVQ AX, K1
   914	VPERMI2B.Z Z3, Z2, K1, Z5
   915	MOVQ $0x1f0007c001f000, AX
   916	KMOVQ AX, K1
   917	VPERMB.Z Z4, Z6, K1, Z0
   918	VPORQ Z0, Z5, Z2
   919	RET
   920
   921GLOBL expandAVX512_20_inShuf0<>(SB), RODATA, $0x40
   922DATA  expandAVX512_20_inShuf0<>+0x00(SB)/8, $0x0303020201010000
   923DATA  expandAVX512_20_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
   924DATA  expandAVX512_20_inShuf0<>+0x10(SB)/8, $0xff03020201010000
   925DATA  expandAVX512_20_inShuf0<>+0x18(SB)/8, $0xffff020201010000
   926DATA  expandAVX512_20_inShuf0<>+0x20(SB)/8, $0xffffffffff020100
   927DATA  expandAVX512_20_inShuf0<>+0x28(SB)/8, $0xffff020201010000
   928DATA  expandAVX512_20_inShuf0<>+0x30(SB)/8, $0xffff020201010000
   929DATA  expandAVX512_20_inShuf0<>+0x38(SB)/8, $0xffffffffff020100
   930
   931GLOBL expandAVX512_20_mat0<>(SB), RODATA, $0x40
   932DATA  expandAVX512_20_mat0<>+0x00(SB)/8, $0x0101010101010101
   933DATA  expandAVX512_20_mat0<>+0x08(SB)/8, $0x0101010102020202
   934DATA  expandAVX512_20_mat0<>+0x10(SB)/8, $0x0202020202020202
   935DATA  expandAVX512_20_mat0<>+0x18(SB)/8, $0x0404040404040404
   936DATA  expandAVX512_20_mat0<>+0x20(SB)/8, $0x0404040408080808
   937DATA  expandAVX512_20_mat0<>+0x28(SB)/8, $0x0808080808080808
   938DATA  expandAVX512_20_mat0<>+0x30(SB)/8, $0x1010101010101010
   939DATA  expandAVX512_20_mat0<>+0x38(SB)/8, $0x1010101020202020
   940
   941GLOBL expandAVX512_20_inShuf1<>(SB), RODATA, $0x40
   942DATA  expandAVX512_20_inShuf1<>+0x00(SB)/8, $0xffff020201010000
   943DATA  expandAVX512_20_inShuf1<>+0x08(SB)/8, $0xffff020201010000
   944DATA  expandAVX512_20_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
   945DATA  expandAVX512_20_inShuf1<>+0x18(SB)/8, $0xffff020201010000
   946DATA  expandAVX512_20_inShuf1<>+0x20(SB)/8, $0xff06060505040403
   947DATA  expandAVX512_20_inShuf1<>+0x28(SB)/8, $0x0606050504040303
   948DATA  expandAVX512_20_inShuf1<>+0x30(SB)/8, $0xffffffff06050403
   949DATA  expandAVX512_20_inShuf1<>+0x38(SB)/8, $0xffff050504040303
   950
   951GLOBL expandAVX512_20_mat1<>(SB), RODATA, $0x40
   952DATA  expandAVX512_20_mat1<>+0x00(SB)/8, $0x2020202020202020
   953DATA  expandAVX512_20_mat1<>+0x08(SB)/8, $0x4040404040404040
   954DATA  expandAVX512_20_mat1<>+0x10(SB)/8, $0x4040404080808080
   955DATA  expandAVX512_20_mat1<>+0x18(SB)/8, $0x8080808080808080
   956DATA  expandAVX512_20_mat1<>+0x20(SB)/8, $0x0202020202020202
   957DATA  expandAVX512_20_mat1<>+0x28(SB)/8, $0x0404040404040404
   958DATA  expandAVX512_20_mat1<>+0x30(SB)/8, $0x0404040408080808
   959DATA  expandAVX512_20_mat1<>+0x38(SB)/8, $0x0808080808080808
   960
   961GLOBL expandAVX512_20_inShuf2<>(SB), RODATA, $0x40
   962DATA  expandAVX512_20_inShuf2<>+0x00(SB)/8, $0xffff050504040303
   963DATA  expandAVX512_20_inShuf2<>+0x08(SB)/8, $0xffffffffff050403
   964DATA  expandAVX512_20_inShuf2<>+0x10(SB)/8, $0xffff050504040303
   965DATA  expandAVX512_20_inShuf2<>+0x18(SB)/8, $0xffff050504040303
   966DATA  expandAVX512_20_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
   967DATA  expandAVX512_20_inShuf2<>+0x28(SB)/8, $0xffff050504040303
   968DATA  expandAVX512_20_inShuf2<>+0x30(SB)/8, $0xffff060605050404
   969DATA  expandAVX512_20_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
   970
   971GLOBL expandAVX512_20_mat2<>(SB), RODATA, $0x40
   972DATA  expandAVX512_20_mat2<>+0x00(SB)/8, $0x1010101010101010
   973DATA  expandAVX512_20_mat2<>+0x08(SB)/8, $0x1010101020202020
   974DATA  expandAVX512_20_mat2<>+0x10(SB)/8, $0x2020202020202020
   975DATA  expandAVX512_20_mat2<>+0x18(SB)/8, $0x4040404040404040
   976DATA  expandAVX512_20_mat2<>+0x20(SB)/8, $0x4040404080808080
   977DATA  expandAVX512_20_mat2<>+0x28(SB)/8, $0x8080808080808080
   978DATA  expandAVX512_20_mat2<>+0x30(SB)/8, $0x0101010101010101
   979DATA  expandAVX512_20_mat2<>+0x38(SB)/8, $0x0101010102020202
   980
   981GLOBL expandAVX512_20_outShufLo(SB), RODATA, $0x40
   982DATA  expandAVX512_20_outShufLo+0x00(SB)/8, $0x2019181110080100
   983DATA  expandAVX512_20_outShufLo+0x08(SB)/8, $0x4841403831302928
   984DATA  expandAVX512_20_outShufLo+0x10(SB)/8, $0x1209030259585049
   985DATA  expandAVX512_20_outShufLo+0x18(SB)/8, $0x33322b2a211b1a13
   986DATA  expandAVX512_20_outShufLo+0x20(SB)/8, $0x5b5a514b4a434239
   987DATA  expandAVX512_20_outShufLo+0x28(SB)/8, $0x221d1c15140a0504
   988DATA  expandAVX512_20_outShufLo+0x30(SB)/8, $0x4c45443a35342d2c
   989DATA  expandAVX512_20_outShufLo+0x38(SB)/8, $0x160b07065d5c524d
   990
   991GLOBL expandAVX512_20_outShufHi(SB), RODATA, $0x40
   992DATA  expandAVX512_20_outShufHi+0x00(SB)/8, $0x4140393830292820
   993DATA  expandAVX512_20_outShufHi+0x08(SB)/8, $0x6968605958515048
   994DATA  expandAVX512_20_outShufHi+0x10(SB)/8, $0x312b2a2221787170
   995DATA  expandAVX512_20_outShufHi+0x18(SB)/8, $0x5a53524943423b3a
   996DATA  expandAVX512_20_outShufHi+0x20(SB)/8, $0x237973726b6a615b
   997DATA  expandAVX512_20_outShufHi+0x28(SB)/8, $0x45443d3c322d2c24
   998DATA  expandAVX512_20_outShufHi+0x30(SB)/8, $0x6d6c625d5c55544a
   999DATA  expandAVX512_20_outShufHi+0x38(SB)/8, $0x332f2e26257a7574
  1000
  1001TEXT expandAVX512_20<>(SB), NOSPLIT, $0-0
  1002	VMOVDQU64 expandAVX512_20_inShuf0<>(SB), Z0
  1003	VMOVDQU64 expandAVX512_20_inShuf1<>(SB), Z3
  1004	VMOVDQU64 expandAVX512_20_inShuf2<>(SB), Z4
  1005	VMOVDQU64 expandAVX512_20_outShufLo(SB), Z1
  1006	VMOVDQU64 expandAVX512_20_outShufHi(SB), Z2
  1007	VMOVDQU64 (AX), Z5
  1008	VPERMB Z5, Z0, Z0
  1009	VGF2P8AFFINEQB $0, expandAVX512_20_mat0<>(SB), Z0, Z0
  1010	VPERMB Z5, Z3, Z3
  1011	VGF2P8AFFINEQB $0, expandAVX512_20_mat1<>(SB), Z3, Z3
  1012	VPERMB Z5, Z4, Z4
  1013	VGF2P8AFFINEQB $0, expandAVX512_20_mat2<>(SB), Z4, Z4
  1014	VPERMI2B Z3, Z0, Z1
  1015	VPERMI2B Z4, Z3, Z2
  1016	RET
  1017
  1018GLOBL expandAVX512_22_inShuf0<>(SB), RODATA, $0x40
  1019DATA  expandAVX512_22_inShuf0<>+0x00(SB)/8, $0xffff020201010000
  1020DATA  expandAVX512_22_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
  1021DATA  expandAVX512_22_inShuf0<>+0x10(SB)/8, $0xffff020201010000
  1022DATA  expandAVX512_22_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
  1023DATA  expandAVX512_22_inShuf0<>+0x20(SB)/8, $0xffff020201010000
  1024DATA  expandAVX512_22_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
  1025DATA  expandAVX512_22_inShuf0<>+0x30(SB)/8, $0xffff020201010000
  1026DATA  expandAVX512_22_inShuf0<>+0x38(SB)/8, $0xffff020201010000
  1027
  1028GLOBL expandAVX512_22_mat0<>(SB), RODATA, $0x40
  1029DATA  expandAVX512_22_mat0<>+0x00(SB)/8, $0x0101010101010101
  1030DATA  expandAVX512_22_mat0<>+0x08(SB)/8, $0x0101010101010202
  1031DATA  expandAVX512_22_mat0<>+0x10(SB)/8, $0x0202020202020202
  1032DATA  expandAVX512_22_mat0<>+0x18(SB)/8, $0x0202020204040404
  1033DATA  expandAVX512_22_mat0<>+0x20(SB)/8, $0x0404040404040404
  1034DATA  expandAVX512_22_mat0<>+0x28(SB)/8, $0x0404080808080808
  1035DATA  expandAVX512_22_mat0<>+0x30(SB)/8, $0x0808080808080808
  1036DATA  expandAVX512_22_mat0<>+0x38(SB)/8, $0x1010101010101010
  1037
  1038GLOBL expandAVX512_22_inShuf1<>(SB), RODATA, $0x40
  1039DATA  expandAVX512_22_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
  1040DATA  expandAVX512_22_inShuf1<>+0x08(SB)/8, $0xffff020201010000
  1041DATA  expandAVX512_22_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
  1042DATA  expandAVX512_22_inShuf1<>+0x18(SB)/8, $0xffff020201010000
  1043DATA  expandAVX512_22_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
  1044DATA  expandAVX512_22_inShuf1<>+0x28(SB)/8, $0xffffffff01010000
  1045DATA  expandAVX512_22_inShuf1<>+0x30(SB)/8, $0xffff040403030202
  1046DATA  expandAVX512_22_inShuf1<>+0x38(SB)/8, $0xffff050504040303
  1047
  1048GLOBL expandAVX512_22_mat1<>(SB), RODATA, $0x40
  1049DATA  expandAVX512_22_mat1<>+0x00(SB)/8, $0x1010101010102020
  1050DATA  expandAVX512_22_mat1<>+0x08(SB)/8, $0x2020202020202020
  1051DATA  expandAVX512_22_mat1<>+0x10(SB)/8, $0x2020202040404040
  1052DATA  expandAVX512_22_mat1<>+0x18(SB)/8, $0x4040404040404040
  1053DATA  expandAVX512_22_mat1<>+0x20(SB)/8, $0x4040808080808080
  1054DATA  expandAVX512_22_mat1<>+0x28(SB)/8, $0x8080808080808080
  1055DATA  expandAVX512_22_mat1<>+0x30(SB)/8, $0x8080808080808080
  1056DATA  expandAVX512_22_mat1<>+0x38(SB)/8, $0x0101010101010101
  1057
  1058GLOBL expandAVX512_22_inShuf2<>(SB), RODATA, $0x40
  1059DATA  expandAVX512_22_inShuf2<>+0x00(SB)/8, $0xffffffffff050403
  1060DATA  expandAVX512_22_inShuf2<>+0x08(SB)/8, $0xffff050504040303
  1061DATA  expandAVX512_22_inShuf2<>+0x10(SB)/8, $0xffffffffff050403
  1062DATA  expandAVX512_22_inShuf2<>+0x18(SB)/8, $0xffff050504040303
  1063DATA  expandAVX512_22_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
  1064DATA  expandAVX512_22_inShuf2<>+0x28(SB)/8, $0xffff050504040303
  1065DATA  expandAVX512_22_inShuf2<>+0x30(SB)/8, $0xffff050504040303
  1066DATA  expandAVX512_22_inShuf2<>+0x38(SB)/8, $0xffffffffff050403
  1067
  1068GLOBL expandAVX512_22_mat2<>(SB), RODATA, $0x40
  1069DATA  expandAVX512_22_mat2<>+0x00(SB)/8, $0x0101010101010202
  1070DATA  expandAVX512_22_mat2<>+0x08(SB)/8, $0x0202020202020202
  1071DATA  expandAVX512_22_mat2<>+0x10(SB)/8, $0x0202020204040404
  1072DATA  expandAVX512_22_mat2<>+0x18(SB)/8, $0x0404040404040404
  1073DATA  expandAVX512_22_mat2<>+0x20(SB)/8, $0x0404080808080808
  1074DATA  expandAVX512_22_mat2<>+0x28(SB)/8, $0x0808080808080808
  1075DATA  expandAVX512_22_mat2<>+0x30(SB)/8, $0x1010101010101010
  1076DATA  expandAVX512_22_mat2<>+0x38(SB)/8, $0x1010101010102020
  1077
  1078GLOBL expandAVX512_22_inShuf3<>(SB), RODATA, $0x40
  1079DATA  expandAVX512_22_inShuf3<>+0x00(SB)/8, $0xffff050504040303
  1080DATA  expandAVX512_22_inShuf3<>+0x08(SB)/8, $0xffffffffff050403
  1081DATA  expandAVX512_22_inShuf3<>+0x10(SB)/8, $0xffffff0504040303
  1082DATA  expandAVX512_22_inShuf3<>+0x18(SB)/8, $0xffffffffffff0403
  1083DATA  expandAVX512_22_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  1084DATA  expandAVX512_22_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  1085DATA  expandAVX512_22_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1086DATA  expandAVX512_22_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1087
  1088GLOBL expandAVX512_22_mat3<>(SB), RODATA, $0x40
  1089DATA  expandAVX512_22_mat3<>+0x00(SB)/8, $0x2020202020202020
  1090DATA  expandAVX512_22_mat3<>+0x08(SB)/8, $0x2020202040404040
  1091DATA  expandAVX512_22_mat3<>+0x10(SB)/8, $0x4040404040404040
  1092DATA  expandAVX512_22_mat3<>+0x18(SB)/8, $0x4040808080808080
  1093DATA  expandAVX512_22_mat3<>+0x20(SB)/8, $0x0000000000000000
  1094DATA  expandAVX512_22_mat3<>+0x28(SB)/8, $0x0000000000000000
  1095DATA  expandAVX512_22_mat3<>+0x30(SB)/8, $0x0000000000000000
  1096DATA  expandAVX512_22_mat3<>+0x38(SB)/8, $0x0000000000000000
  1097
  1098GLOBL expandAVX512_22_outShufLo(SB), RODATA, $0x40
  1099DATA  expandAVX512_22_outShufLo+0x00(SB)/8, $0x2120181110080100
  1100DATA  expandAVX512_22_outShufLo+0x08(SB)/8, $0x4948403938313028
  1101DATA  expandAVX512_22_outShufLo+0x10(SB)/8, $0x0302696860595850
  1102DATA  expandAVX512_22_outShufLo+0x18(SB)/8, $0x3229232219131209
  1103DATA  expandAVX512_22_outShufLo+0x20(SB)/8, $0x5a514b4a413b3a33
  1104DATA  expandAVX512_22_outShufLo+0x28(SB)/8, $0x140a05046b6a615b
  1105DATA  expandAVX512_22_outShufLo+0x30(SB)/8, $0x3c35342a25241a15
  1106DATA  expandAVX512_22_outShufLo+0x38(SB)/8, $0x625d5c524d4c423d
  1107
  1108GLOBL expandAVX512_22_outShufHi0(SB), RODATA, $0x40
  1109DATA  expandAVX512_22_outShufHi0+0x00(SB)/8, $0x5049484039383130
  1110DATA  expandAVX512_22_outShufHi0+0x08(SB)/8, $0x7871706968605958
  1111DATA  expandAVX512_22_outShufHi0+0x10(SB)/8, $0x3332ffffffffffff
  1112DATA  expandAVX512_22_outShufHi0+0x18(SB)/8, $0x5b5a514b4a413b3a
  1113DATA  expandAVX512_22_outShufHi0+0x20(SB)/8, $0xffff7973726b6a61
  1114DATA  expandAVX512_22_outShufHi0+0x28(SB)/8, $0x3d3c3534ffffffff
  1115DATA  expandAVX512_22_outShufHi0+0x30(SB)/8, $0x6c625d5c524d4c42
  1116DATA  expandAVX512_22_outShufHi0+0x38(SB)/8, $0xffffffff7a75746d
  1117
  1118GLOBL expandAVX512_22_outShufHi1(SB), RODATA, $0x40
  1119DATA  expandAVX512_22_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1120DATA  expandAVX512_22_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1121DATA  expandAVX512_22_outShufHi1+0x10(SB)/8, $0xffff181110080100
  1122DATA  expandAVX512_22_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  1123DATA  expandAVX512_22_outShufHi1+0x20(SB)/8, $0x0302ffffffffffff
  1124DATA  expandAVX512_22_outShufHi1+0x28(SB)/8, $0xffffffff19131209
  1125DATA  expandAVX512_22_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
  1126DATA  expandAVX512_22_outShufHi1+0x38(SB)/8, $0x140a0504ffffffff
  1127
  1128TEXT expandAVX512_22<>(SB), NOSPLIT, $0-0
  1129	VMOVDQU64 expandAVX512_22_inShuf0<>(SB), Z0
  1130	VMOVDQU64 expandAVX512_22_inShuf1<>(SB), Z2
  1131	VMOVDQU64 expandAVX512_22_inShuf2<>(SB), Z3
  1132	VMOVDQU64 expandAVX512_22_inShuf3<>(SB), Z4
  1133	VMOVDQU64 expandAVX512_22_outShufLo(SB), Z1
  1134	VMOVDQU64 expandAVX512_22_outShufHi0(SB), Z5
  1135	VMOVDQU64 expandAVX512_22_outShufHi1(SB), Z6
  1136	VMOVDQU64 (AX), Z7
  1137	VPERMB Z7, Z0, Z0
  1138	VGF2P8AFFINEQB $0, expandAVX512_22_mat0<>(SB), Z0, Z0
  1139	VPERMB Z7, Z2, Z2
  1140	VGF2P8AFFINEQB $0, expandAVX512_22_mat1<>(SB), Z2, Z2
  1141	VPERMB Z7, Z3, Z3
  1142	VGF2P8AFFINEQB $0, expandAVX512_22_mat2<>(SB), Z3, Z3
  1143	VPERMB Z7, Z4, Z4
  1144	VGF2P8AFFINEQB $0, expandAVX512_22_mat3<>(SB), Z4, Z4
  1145	VPERMI2B Z2, Z0, Z1
  1146	MOVQ $0xffff03fffc0ffff, AX
  1147	KMOVQ AX, K1
  1148	VPERMI2B.Z Z3, Z2, K1, Z5
  1149	MOVQ $0xf0000fc0003f0000, AX
  1150	KMOVQ AX, K1
  1151	VPERMB.Z Z4, Z6, K1, Z0
  1152	VPORQ Z0, Z5, Z2
  1153	RET
  1154
  1155GLOBL expandAVX512_24_inShuf0<>(SB), RODATA, $0x40
  1156DATA  expandAVX512_24_inShuf0<>+0x00(SB)/8, $0x0202010101000000
  1157DATA  expandAVX512_24_inShuf0<>+0x08(SB)/8, $0x0202010101000000
  1158DATA  expandAVX512_24_inShuf0<>+0x10(SB)/8, $0x0202010101000000
  1159DATA  expandAVX512_24_inShuf0<>+0x18(SB)/8, $0x0202010101000000
  1160DATA  expandAVX512_24_inShuf0<>+0x20(SB)/8, $0x0202010101000000
  1161DATA  expandAVX512_24_inShuf0<>+0x28(SB)/8, $0xff02010101000000
  1162DATA  expandAVX512_24_inShuf0<>+0x30(SB)/8, $0xffff010101000000
  1163DATA  expandAVX512_24_inShuf0<>+0x38(SB)/8, $0xffff010101000000
  1164
  1165GLOBL expandAVX512_24_mat0<>(SB), RODATA, $0x40
  1166DATA  expandAVX512_24_mat0<>+0x00(SB)/8, $0x0101010101010101
  1167DATA  expandAVX512_24_mat0<>+0x08(SB)/8, $0x0202020202020202
  1168DATA  expandAVX512_24_mat0<>+0x10(SB)/8, $0x0404040404040404
  1169DATA  expandAVX512_24_mat0<>+0x18(SB)/8, $0x0808080808080808
  1170DATA  expandAVX512_24_mat0<>+0x20(SB)/8, $0x1010101010101010
  1171DATA  expandAVX512_24_mat0<>+0x28(SB)/8, $0x2020202020202020
  1172DATA  expandAVX512_24_mat0<>+0x30(SB)/8, $0x4040404040404040
  1173DATA  expandAVX512_24_mat0<>+0x38(SB)/8, $0x8080808080808080
  1174
  1175GLOBL expandAVX512_24_inShuf1<>(SB), RODATA, $0x40
  1176DATA  expandAVX512_24_inShuf1<>+0x00(SB)/8, $0xffffffffffffff02
  1177DATA  expandAVX512_24_inShuf1<>+0x08(SB)/8, $0xffffffffffffff02
  1178DATA  expandAVX512_24_inShuf1<>+0x10(SB)/8, $0xffffffffffffff02
  1179DATA  expandAVX512_24_inShuf1<>+0x18(SB)/8, $0xffffffffffffff02
  1180DATA  expandAVX512_24_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
  1181DATA  expandAVX512_24_inShuf1<>+0x28(SB)/8, $0x0404040303030202
  1182DATA  expandAVX512_24_inShuf1<>+0x30(SB)/8, $0x0404030303020202
  1183DATA  expandAVX512_24_inShuf1<>+0x38(SB)/8, $0x0404030303020202
  1184
  1185GLOBL expandAVX512_24_inShuf2<>(SB), RODATA, $0x40
  1186DATA  expandAVX512_24_inShuf2<>+0x00(SB)/8, $0x0505040404030303
  1187DATA  expandAVX512_24_inShuf2<>+0x08(SB)/8, $0x0505040404030303
  1188DATA  expandAVX512_24_inShuf2<>+0x10(SB)/8, $0x0505040404030303
  1189DATA  expandAVX512_24_inShuf2<>+0x18(SB)/8, $0xffff040404030303
  1190DATA  expandAVX512_24_inShuf2<>+0x20(SB)/8, $0xffff040404030303
  1191DATA  expandAVX512_24_inShuf2<>+0x28(SB)/8, $0xffffffffffffff04
  1192DATA  expandAVX512_24_inShuf2<>+0x30(SB)/8, $0xffffffffffffff04
  1193DATA  expandAVX512_24_inShuf2<>+0x38(SB)/8, $0xffffffffffffff05
  1194
  1195GLOBL expandAVX512_24_mat2<>(SB), RODATA, $0x40
  1196DATA  expandAVX512_24_mat2<>+0x00(SB)/8, $0x0101010101010101
  1197DATA  expandAVX512_24_mat2<>+0x08(SB)/8, $0x0202020202020202
  1198DATA  expandAVX512_24_mat2<>+0x10(SB)/8, $0x0404040404040404
  1199DATA  expandAVX512_24_mat2<>+0x18(SB)/8, $0x0808080808080808
  1200DATA  expandAVX512_24_mat2<>+0x20(SB)/8, $0x1010101010101010
  1201DATA  expandAVX512_24_mat2<>+0x28(SB)/8, $0x4040404040404040
  1202DATA  expandAVX512_24_mat2<>+0x30(SB)/8, $0x8080808080808080
  1203DATA  expandAVX512_24_mat2<>+0x38(SB)/8, $0x0101010101010101
  1204
  1205GLOBL expandAVX512_24_inShuf3<>(SB), RODATA, $0x40
  1206DATA  expandAVX512_24_inShuf3<>+0x00(SB)/8, $0xffffffffffffff05
  1207DATA  expandAVX512_24_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
  1208DATA  expandAVX512_24_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
  1209DATA  expandAVX512_24_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
  1210DATA  expandAVX512_24_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  1211DATA  expandAVX512_24_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  1212DATA  expandAVX512_24_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1213DATA  expandAVX512_24_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1214
  1215GLOBL expandAVX512_24_mat3<>(SB), RODATA, $0x40
  1216DATA  expandAVX512_24_mat3<>+0x00(SB)/8, $0x0202020202020202
  1217DATA  expandAVX512_24_mat3<>+0x08(SB)/8, $0x0000000000000000
  1218DATA  expandAVX512_24_mat3<>+0x10(SB)/8, $0x0000000000000000
  1219DATA  expandAVX512_24_mat3<>+0x18(SB)/8, $0x0000000000000000
  1220DATA  expandAVX512_24_mat3<>+0x20(SB)/8, $0x0000000000000000
  1221DATA  expandAVX512_24_mat3<>+0x28(SB)/8, $0x0000000000000000
  1222DATA  expandAVX512_24_mat3<>+0x30(SB)/8, $0x0000000000000000
  1223DATA  expandAVX512_24_mat3<>+0x38(SB)/8, $0x0000000000000000
  1224
  1225GLOBL expandAVX512_24_outShufLo(SB), RODATA, $0x40
  1226DATA  expandAVX512_24_outShufLo+0x00(SB)/8, $0x11100a0908020100
  1227DATA  expandAVX512_24_outShufLo+0x08(SB)/8, $0x282221201a191812
  1228DATA  expandAVX512_24_outShufLo+0x10(SB)/8, $0x3a39383231302a29
  1229DATA  expandAVX512_24_outShufLo+0x18(SB)/8, $0x14130d0c0b050403
  1230DATA  expandAVX512_24_outShufLo+0x20(SB)/8, $0x2b2524231d1c1b15
  1231DATA  expandAVX512_24_outShufLo+0x28(SB)/8, $0x3d3c3b3534332d2c
  1232DATA  expandAVX512_24_outShufLo+0x30(SB)/8, $0x1716480f0e400706
  1233DATA  expandAVX512_24_outShufLo+0x38(SB)/8, $0x2e602726581f1e50
  1234
  1235GLOBL expandAVX512_24_outShufHi0(SB), RODATA, $0x40
  1236DATA  expandAVX512_24_outShufHi0+0x00(SB)/8, $0x3a39383231302928
  1237DATA  expandAVX512_24_outShufHi0+0x08(SB)/8, $0x51504a4948424140
  1238DATA  expandAVX512_24_outShufHi0+0x10(SB)/8, $0x2a6261605a595852
  1239DATA  expandAVX512_24_outShufHi0+0x18(SB)/8, $0x3d3c3b3534332c2b
  1240DATA  expandAVX512_24_outShufHi0+0x20(SB)/8, $0x54534d4c4b454443
  1241DATA  expandAVX512_24_outShufHi0+0x28(SB)/8, $0x2d6564635d5c5b55
  1242DATA  expandAVX512_24_outShufHi0+0x30(SB)/8, $0x703f3e6837362f2e
  1243DATA  expandAVX512_24_outShufHi0+0x38(SB)/8, $0x5756ff4f4e784746
  1244
  1245GLOBL expandAVX512_24_outShufHi1(SB), RODATA, $0x40
  1246DATA  expandAVX512_24_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1247DATA  expandAVX512_24_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1248DATA  expandAVX512_24_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  1249DATA  expandAVX512_24_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  1250DATA  expandAVX512_24_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1251DATA  expandAVX512_24_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  1252DATA  expandAVX512_24_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
  1253DATA  expandAVX512_24_outShufHi1+0x38(SB)/8, $0xffff00ffffffffff
  1254
  1255TEXT expandAVX512_24<>(SB), NOSPLIT, $0-0
  1256	VMOVDQU64 expandAVX512_24_inShuf0<>(SB), Z0
  1257	VMOVDQU64 expandAVX512_24_mat0<>(SB), Z2
  1258	VMOVDQU64 expandAVX512_24_inShuf1<>(SB), Z3
  1259	VMOVDQU64 expandAVX512_24_inShuf2<>(SB), Z4
  1260	VMOVDQU64 expandAVX512_24_inShuf3<>(SB), Z5
  1261	VMOVDQU64 expandAVX512_24_outShufLo(SB), Z1
  1262	VMOVDQU64 expandAVX512_24_outShufHi0(SB), Z6
  1263	VMOVDQU64 expandAVX512_24_outShufHi1(SB), Z7
  1264	VMOVDQU64 (AX), Z8
  1265	VPERMB Z8, Z0, Z0
  1266	VGF2P8AFFINEQB $0, Z2, Z0, Z0
  1267	VPERMB Z8, Z3, Z3
  1268	VGF2P8AFFINEQB $0, Z2, Z3, Z2
  1269	VPERMB Z8, Z4, Z3
  1270	VGF2P8AFFINEQB $0, expandAVX512_24_mat2<>(SB), Z3, Z3
  1271	VPERMB Z8, Z5, Z4
  1272	VGF2P8AFFINEQB $0, expandAVX512_24_mat3<>(SB), Z4, Z4
  1273	VPERMI2B Z2, Z0, Z1
  1274	MOVQ $0xdfffffffffffffff, AX
  1275	KMOVQ AX, K1
  1276	VPERMI2B.Z Z3, Z2, K1, Z6
  1277	MOVQ $0x2000000000000000, AX
  1278	KMOVQ AX, K1
  1279	VPERMB.Z Z4, Z7, K1, Z0
  1280	VPORQ Z0, Z6, Z2
  1281	RET
  1282
  1283GLOBL expandAVX512_26_inShuf0<>(SB), RODATA, $0x40
  1284DATA  expandAVX512_26_inShuf0<>+0x00(SB)/8, $0x0202010101000000
  1285DATA  expandAVX512_26_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
  1286DATA  expandAVX512_26_inShuf0<>+0x10(SB)/8, $0xffff020201010000
  1287DATA  expandAVX512_26_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
  1288DATA  expandAVX512_26_inShuf0<>+0x20(SB)/8, $0xffff020201010000
  1289DATA  expandAVX512_26_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
  1290DATA  expandAVX512_26_inShuf0<>+0x30(SB)/8, $0x0202010101000000
  1291DATA  expandAVX512_26_inShuf0<>+0x38(SB)/8, $0xffff010101000000
  1292
  1293GLOBL expandAVX512_26_mat0<>(SB), RODATA, $0x40
  1294DATA  expandAVX512_26_mat0<>+0x00(SB)/8, $0x0101010101010101
  1295DATA  expandAVX512_26_mat0<>+0x08(SB)/8, $0x0101020202020202
  1296DATA  expandAVX512_26_mat0<>+0x10(SB)/8, $0x0202020202020202
  1297DATA  expandAVX512_26_mat0<>+0x18(SB)/8, $0x0202020204040404
  1298DATA  expandAVX512_26_mat0<>+0x20(SB)/8, $0x0404040404040404
  1299DATA  expandAVX512_26_mat0<>+0x28(SB)/8, $0x0404040404040808
  1300DATA  expandAVX512_26_mat0<>+0x30(SB)/8, $0x0808080808080808
  1301DATA  expandAVX512_26_mat0<>+0x38(SB)/8, $0x1010101010101010
  1302
  1303GLOBL expandAVX512_26_inShuf1<>(SB), RODATA, $0x40
  1304DATA  expandAVX512_26_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
  1305DATA  expandAVX512_26_inShuf1<>+0x08(SB)/8, $0xffffffff01010000
  1306DATA  expandAVX512_26_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
  1307DATA  expandAVX512_26_inShuf1<>+0x18(SB)/8, $0xffffffff01010000
  1308DATA  expandAVX512_26_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
  1309DATA  expandAVX512_26_inShuf1<>+0x28(SB)/8, $0xffff010101000000
  1310DATA  expandAVX512_26_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
  1311DATA  expandAVX512_26_inShuf1<>+0x38(SB)/8, $0xff04040403030302
  1312
  1313GLOBL expandAVX512_26_mat1<>(SB), RODATA, $0x40
  1314DATA  expandAVX512_26_mat1<>+0x00(SB)/8, $0x1010202020202020
  1315DATA  expandAVX512_26_mat1<>+0x08(SB)/8, $0x2020202020202020
  1316DATA  expandAVX512_26_mat1<>+0x10(SB)/8, $0x2020202040404040
  1317DATA  expandAVX512_26_mat1<>+0x18(SB)/8, $0x4040404040404040
  1318DATA  expandAVX512_26_mat1<>+0x20(SB)/8, $0x4040404040408080
  1319DATA  expandAVX512_26_mat1<>+0x28(SB)/8, $0x8080808080808080
  1320DATA  expandAVX512_26_mat1<>+0x30(SB)/8, $0x0101010101010101
  1321DATA  expandAVX512_26_mat1<>+0x38(SB)/8, $0x0808080808080808
  1322
  1323GLOBL expandAVX512_26_inShuf2<>(SB), RODATA, $0x40
  1324DATA  expandAVX512_26_inShuf2<>+0x00(SB)/8, $0x0404030303020202
  1325DATA  expandAVX512_26_inShuf2<>+0x08(SB)/8, $0xffffffffff040302
  1326DATA  expandAVX512_26_inShuf2<>+0x10(SB)/8, $0xffff040403030202
  1327DATA  expandAVX512_26_inShuf2<>+0x18(SB)/8, $0xffffffffff040302
  1328DATA  expandAVX512_26_inShuf2<>+0x20(SB)/8, $0xffff040403030202
  1329DATA  expandAVX512_26_inShuf2<>+0x28(SB)/8, $0xffffffffff040302
  1330DATA  expandAVX512_26_inShuf2<>+0x30(SB)/8, $0xff04030303020202
  1331DATA  expandAVX512_26_inShuf2<>+0x38(SB)/8, $0xffff040404030303
  1332
  1333GLOBL expandAVX512_26_mat2<>(SB), RODATA, $0x40
  1334DATA  expandAVX512_26_mat2<>+0x00(SB)/8, $0x1010101010101010
  1335DATA  expandAVX512_26_mat2<>+0x08(SB)/8, $0x1010202020202020
  1336DATA  expandAVX512_26_mat2<>+0x10(SB)/8, $0x2020202020202020
  1337DATA  expandAVX512_26_mat2<>+0x18(SB)/8, $0x2020202040404040
  1338DATA  expandAVX512_26_mat2<>+0x20(SB)/8, $0x4040404040404040
  1339DATA  expandAVX512_26_mat2<>+0x28(SB)/8, $0x4040404040408080
  1340DATA  expandAVX512_26_mat2<>+0x30(SB)/8, $0x8080808080808080
  1341DATA  expandAVX512_26_mat2<>+0x38(SB)/8, $0x0101010101010101
  1342
  1343GLOBL expandAVX512_26_inShuf3<>(SB), RODATA, $0x40
  1344DATA  expandAVX512_26_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
  1345DATA  expandAVX512_26_inShuf3<>+0x08(SB)/8, $0xffffffff04040303
  1346DATA  expandAVX512_26_inShuf3<>+0x10(SB)/8, $0xffffffffffff0403
  1347DATA  expandAVX512_26_inShuf3<>+0x18(SB)/8, $0xffffffff04040303
  1348DATA  expandAVX512_26_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
  1349DATA  expandAVX512_26_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
  1350DATA  expandAVX512_26_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1351DATA  expandAVX512_26_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1352
  1353GLOBL expandAVX512_26_mat3<>(SB), RODATA, $0x40
  1354DATA  expandAVX512_26_mat3<>+0x00(SB)/8, $0x0101020202020202
  1355DATA  expandAVX512_26_mat3<>+0x08(SB)/8, $0x0202020202020202
  1356DATA  expandAVX512_26_mat3<>+0x10(SB)/8, $0x0202020204040404
  1357DATA  expandAVX512_26_mat3<>+0x18(SB)/8, $0x0404040404040404
  1358DATA  expandAVX512_26_mat3<>+0x20(SB)/8, $0x0404040404040808
  1359DATA  expandAVX512_26_mat3<>+0x28(SB)/8, $0x1010101010101010
  1360DATA  expandAVX512_26_mat3<>+0x30(SB)/8, $0x0000000000000000
  1361DATA  expandAVX512_26_mat3<>+0x38(SB)/8, $0x0000000000000000
  1362
  1363GLOBL expandAVX512_26_outShufLo(SB), RODATA, $0x40
  1364DATA  expandAVX512_26_outShufLo+0x00(SB)/8, $0x2018111008020100
  1365DATA  expandAVX512_26_outShufLo+0x08(SB)/8, $0x3a39383231302821
  1366DATA  expandAVX512_26_outShufLo+0x10(SB)/8, $0x6860595850494840
  1367DATA  expandAVX512_26_outShufLo+0x18(SB)/8, $0x1312090504036a69
  1368DATA  expandAVX512_26_outShufLo+0x20(SB)/8, $0x3b35343329232219
  1369DATA  expandAVX512_26_outShufLo+0x28(SB)/8, $0x5b5a514b4a413d3c
  1370DATA  expandAVX512_26_outShufLo+0x30(SB)/8, $0x0a7007066d6c6b61
  1371DATA  expandAVX512_26_outShufLo+0x38(SB)/8, $0x37362a25241a1514
  1372
  1373GLOBL expandAVX512_26_outShufHi0(SB), RODATA, $0x40
  1374DATA  expandAVX512_26_outShufHi0+0x00(SB)/8, $0x5851504842414038
  1375DATA  expandAVX512_26_outShufHi0+0x08(SB)/8, $0x7978727170686160
  1376DATA  expandAVX512_26_outShufHi0+0x10(SB)/8, $0xffffffffffffff7a
  1377DATA  expandAVX512_26_outShufHi0+0x18(SB)/8, $0x52494544433b3a39
  1378DATA  expandAVX512_26_outShufHi0+0x20(SB)/8, $0x7574736963625953
  1379DATA  expandAVX512_26_outShufHi0+0x28(SB)/8, $0xffffffffff7d7c7b
  1380DATA  expandAVX512_26_outShufHi0+0x30(SB)/8, $0xff47463e3d3cffff
  1381DATA  expandAVX512_26_outShufHi0+0x38(SB)/8, $0x766a65645a55544a
  1382
  1383GLOBL expandAVX512_26_outShufHi1(SB), RODATA, $0x40
  1384DATA  expandAVX512_26_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1385DATA  expandAVX512_26_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1386DATA  expandAVX512_26_outShufHi1+0x10(SB)/8, $0x20191810090800ff
  1387DATA  expandAVX512_26_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  1388DATA  expandAVX512_26_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1389DATA  expandAVX512_26_outShufHi1+0x28(SB)/8, $0x1a110b0a01ffffff
  1390DATA  expandAVX512_26_outShufHi1+0x30(SB)/8, $0x28ffffffffff211b
  1391DATA  expandAVX512_26_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
  1392
  1393TEXT expandAVX512_26<>(SB), NOSPLIT, $0-0
  1394	VMOVDQU64 expandAVX512_26_inShuf0<>(SB), Z0
  1395	VMOVDQU64 expandAVX512_26_inShuf1<>(SB), Z2
  1396	VMOVDQU64 expandAVX512_26_inShuf2<>(SB), Z3
  1397	VMOVDQU64 expandAVX512_26_inShuf3<>(SB), Z4
  1398	VMOVDQU64 expandAVX512_26_outShufLo(SB), Z1
  1399	VMOVDQU64 expandAVX512_26_outShufHi0(SB), Z5
  1400	VMOVDQU64 expandAVX512_26_outShufHi1(SB), Z6
  1401	VMOVDQU64 (AX), Z7
  1402	VPERMB Z7, Z0, Z0
  1403	VGF2P8AFFINEQB $0, expandAVX512_26_mat0<>(SB), Z0, Z0
  1404	VPERMB Z7, Z2, Z2
  1405	VGF2P8AFFINEQB $0, expandAVX512_26_mat1<>(SB), Z2, Z2
  1406	VPERMB Z7, Z3, Z3
  1407	VGF2P8AFFINEQB $0, expandAVX512_26_mat2<>(SB), Z3, Z3
  1408	VPERMB Z7, Z4, Z4
  1409	VGF2P8AFFINEQB $0, expandAVX512_26_mat3<>(SB), Z4, Z4
  1410	VPERMI2B Z2, Z0, Z1
  1411	MOVQ $0xff7c07ffff01ffff, AX
  1412	KMOVQ AX, K1
  1413	VPERMI2B.Z Z3, Z2, K1, Z5
  1414	MOVQ $0x83f80000fe0000, AX
  1415	KMOVQ AX, K1
  1416	VPERMB.Z Z4, Z6, K1, Z0
  1417	VPORQ Z0, Z5, Z2
  1418	RET
  1419
  1420GLOBL expandAVX512_28_inShuf0<>(SB), RODATA, $0x40
  1421DATA  expandAVX512_28_inShuf0<>+0x00(SB)/8, $0x0202010101000000
  1422DATA  expandAVX512_28_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
  1423DATA  expandAVX512_28_inShuf0<>+0x10(SB)/8, $0x0202010101000000
  1424DATA  expandAVX512_28_inShuf0<>+0x18(SB)/8, $0xff02010101000000
  1425DATA  expandAVX512_28_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
  1426DATA  expandAVX512_28_inShuf0<>+0x28(SB)/8, $0xffff010101000000
  1427DATA  expandAVX512_28_inShuf0<>+0x30(SB)/8, $0xffff010101000000
  1428DATA  expandAVX512_28_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
  1429
  1430GLOBL expandAVX512_28_mat0<>(SB), RODATA, $0x40
  1431DATA  expandAVX512_28_mat0<>+0x00(SB)/8, $0x0101010101010101
  1432DATA  expandAVX512_28_mat0<>+0x08(SB)/8, $0x0101010102020202
  1433DATA  expandAVX512_28_mat0<>+0x10(SB)/8, $0x0202020202020202
  1434DATA  expandAVX512_28_mat0<>+0x18(SB)/8, $0x0404040404040404
  1435DATA  expandAVX512_28_mat0<>+0x20(SB)/8, $0x0404040408080808
  1436DATA  expandAVX512_28_mat0<>+0x28(SB)/8, $0x0808080808080808
  1437DATA  expandAVX512_28_mat0<>+0x30(SB)/8, $0x1010101010101010
  1438DATA  expandAVX512_28_mat0<>+0x38(SB)/8, $0x1010101020202020
  1439
  1440GLOBL expandAVX512_28_inShuf1<>(SB), RODATA, $0x40
  1441DATA  expandAVX512_28_inShuf1<>+0x00(SB)/8, $0xffff010101000000
  1442DATA  expandAVX512_28_inShuf1<>+0x08(SB)/8, $0xffff010101000000
  1443DATA  expandAVX512_28_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
  1444DATA  expandAVX512_28_inShuf1<>+0x18(SB)/8, $0xffff010101000000
  1445DATA  expandAVX512_28_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
  1446DATA  expandAVX512_28_inShuf1<>+0x28(SB)/8, $0xffffffffffffff02
  1447DATA  expandAVX512_28_inShuf1<>+0x30(SB)/8, $0x0404040303030202
  1448DATA  expandAVX512_28_inShuf1<>+0x38(SB)/8, $0xffffffffff040302
  1449
  1450GLOBL expandAVX512_28_mat1<>(SB), RODATA, $0x40
  1451DATA  expandAVX512_28_mat1<>+0x00(SB)/8, $0x2020202020202020
  1452DATA  expandAVX512_28_mat1<>+0x08(SB)/8, $0x4040404040404040
  1453DATA  expandAVX512_28_mat1<>+0x10(SB)/8, $0x4040404080808080
  1454DATA  expandAVX512_28_mat1<>+0x18(SB)/8, $0x8080808080808080
  1455DATA  expandAVX512_28_mat1<>+0x20(SB)/8, $0x0101010101010101
  1456DATA  expandAVX512_28_mat1<>+0x28(SB)/8, $0x0202020202020202
  1457DATA  expandAVX512_28_mat1<>+0x30(SB)/8, $0x0404040404040404
  1458DATA  expandAVX512_28_mat1<>+0x38(SB)/8, $0x0404040408080808
  1459
  1460GLOBL expandAVX512_28_inShuf2<>(SB), RODATA, $0x40
  1461DATA  expandAVX512_28_inShuf2<>+0x00(SB)/8, $0x0404030303020202
  1462DATA  expandAVX512_28_inShuf2<>+0x08(SB)/8, $0x0404030303020202
  1463DATA  expandAVX512_28_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
  1464DATA  expandAVX512_28_inShuf2<>+0x18(SB)/8, $0xffff030303020202
  1465DATA  expandAVX512_28_inShuf2<>+0x20(SB)/8, $0xffff030303020202
  1466DATA  expandAVX512_28_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
  1467DATA  expandAVX512_28_inShuf2<>+0x30(SB)/8, $0xffff030303020202
  1468DATA  expandAVX512_28_inShuf2<>+0x38(SB)/8, $0xffff040404030303
  1469
  1470GLOBL expandAVX512_28_mat2<>(SB), RODATA, $0x40
  1471DATA  expandAVX512_28_mat2<>+0x00(SB)/8, $0x0808080808080808
  1472DATA  expandAVX512_28_mat2<>+0x08(SB)/8, $0x1010101010101010
  1473DATA  expandAVX512_28_mat2<>+0x10(SB)/8, $0x1010101020202020
  1474DATA  expandAVX512_28_mat2<>+0x18(SB)/8, $0x2020202020202020
  1475DATA  expandAVX512_28_mat2<>+0x20(SB)/8, $0x4040404040404040
  1476DATA  expandAVX512_28_mat2<>+0x28(SB)/8, $0x4040404080808080
  1477DATA  expandAVX512_28_mat2<>+0x30(SB)/8, $0x8080808080808080
  1478DATA  expandAVX512_28_mat2<>+0x38(SB)/8, $0x0101010101010101
  1479
  1480GLOBL expandAVX512_28_inShuf3<>(SB), RODATA, $0x40
  1481DATA  expandAVX512_28_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
  1482DATA  expandAVX512_28_inShuf3<>+0x08(SB)/8, $0xffff040404030303
  1483DATA  expandAVX512_28_inShuf3<>+0x10(SB)/8, $0xffffffffffffff04
  1484DATA  expandAVX512_28_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
  1485DATA  expandAVX512_28_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  1486DATA  expandAVX512_28_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  1487DATA  expandAVX512_28_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1488DATA  expandAVX512_28_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1489
  1490GLOBL expandAVX512_28_mat3<>(SB), RODATA, $0x40
  1491DATA  expandAVX512_28_mat3<>+0x00(SB)/8, $0x0101010102020202
  1492DATA  expandAVX512_28_mat3<>+0x08(SB)/8, $0x0202020202020202
  1493DATA  expandAVX512_28_mat3<>+0x10(SB)/8, $0x0808080808080808
  1494DATA  expandAVX512_28_mat3<>+0x18(SB)/8, $0x0000000000000000
  1495DATA  expandAVX512_28_mat3<>+0x20(SB)/8, $0x0000000000000000
  1496DATA  expandAVX512_28_mat3<>+0x28(SB)/8, $0x0000000000000000
  1497DATA  expandAVX512_28_mat3<>+0x30(SB)/8, $0x0000000000000000
  1498DATA  expandAVX512_28_mat3<>+0x38(SB)/8, $0x0000000000000000
  1499
  1500GLOBL expandAVX512_28_outShufLo(SB), RODATA, $0x40
  1501DATA  expandAVX512_28_outShufLo+0x00(SB)/8, $0x1812111008020100
  1502DATA  expandAVX512_28_outShufLo+0x08(SB)/8, $0x31302a2928201a19
  1503DATA  expandAVX512_28_outShufLo+0x10(SB)/8, $0x4a49484241403832
  1504DATA  expandAVX512_28_outShufLo+0x18(SB)/8, $0x090504035a595850
  1505DATA  expandAVX512_28_outShufLo+0x20(SB)/8, $0x2b211d1c1b151413
  1506DATA  expandAVX512_28_outShufLo+0x28(SB)/8, $0x4443393534332d2c
  1507DATA  expandAVX512_28_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b45
  1508DATA  expandAVX512_28_outShufLo+0x38(SB)/8, $0x1e6817160a600706
  1509
  1510GLOBL expandAVX512_28_outShufHi0(SB), RODATA, $0x40
  1511DATA  expandAVX512_28_outShufHi0+0x00(SB)/8, $0x4948424140383130
  1512DATA  expandAVX512_28_outShufHi0+0x08(SB)/8, $0x6261605a5958504a
  1513DATA  expandAVX512_28_outShufHi0+0x10(SB)/8, $0xff7a797872717068
  1514DATA  expandAVX512_28_outShufHi0+0x18(SB)/8, $0x4339343332ffffff
  1515DATA  expandAVX512_28_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b4544
  1516DATA  expandAVX512_28_outShufHi0+0x28(SB)/8, $0x757473696564635d
  1517DATA  expandAVX512_28_outShufHi0+0x30(SB)/8, $0x35ffffffff7d7c7b
  1518DATA  expandAVX512_28_outShufHi0+0x38(SB)/8, $0x4f4eff47463a3736
  1519
  1520GLOBL expandAVX512_28_outShufHi1(SB), RODATA, $0x40
  1521DATA  expandAVX512_28_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1522DATA  expandAVX512_28_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1523DATA  expandAVX512_28_outShufHi1+0x10(SB)/8, $0x00ffffffffffffff
  1524DATA  expandAVX512_28_outShufHi1+0x18(SB)/8, $0xffffffffff0a0908
  1525DATA  expandAVX512_28_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1526DATA  expandAVX512_28_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  1527DATA  expandAVX512_28_outShufHi1+0x30(SB)/8, $0xff0d0c0b01ffffff
  1528DATA  expandAVX512_28_outShufHi1+0x38(SB)/8, $0xffff10ffffffffff
  1529
  1530TEXT expandAVX512_28<>(SB), NOSPLIT, $0-0
  1531	VMOVDQU64 expandAVX512_28_inShuf0<>(SB), Z0
  1532	VMOVDQU64 expandAVX512_28_inShuf1<>(SB), Z2
  1533	VMOVDQU64 expandAVX512_28_inShuf2<>(SB), Z3
  1534	VMOVDQU64 expandAVX512_28_inShuf3<>(SB), Z4
  1535	VMOVDQU64 expandAVX512_28_outShufLo(SB), Z1
  1536	VMOVDQU64 expandAVX512_28_outShufHi0(SB), Z5
  1537	VMOVDQU64 expandAVX512_28_outShufHi1(SB), Z6
  1538	VMOVDQU64 (AX), Z7
  1539	VPERMB Z7, Z0, Z0
  1540	VGF2P8AFFINEQB $0, expandAVX512_28_mat0<>(SB), Z0, Z0
  1541	VPERMB Z7, Z2, Z2
  1542	VGF2P8AFFINEQB $0, expandAVX512_28_mat1<>(SB), Z2, Z2
  1543	VPERMB Z7, Z3, Z3
  1544	VGF2P8AFFINEQB $0, expandAVX512_28_mat2<>(SB), Z3, Z3
  1545	VPERMB Z7, Z4, Z4
  1546	VGF2P8AFFINEQB $0, expandAVX512_28_mat3<>(SB), Z4, Z4
  1547	VPERMI2B Z2, Z0, Z1
  1548	MOVQ $0xdf87fffff87fffff, AX
  1549	KMOVQ AX, K1
  1550	VPERMI2B.Z Z3, Z2, K1, Z5
  1551	MOVQ $0x2078000007800000, AX
  1552	KMOVQ AX, K1
  1553	VPERMB.Z Z4, Z6, K1, Z0
  1554	VPORQ Z0, Z5, Z2
  1555	RET
  1556
  1557GLOBL expandAVX512_30_inShuf0<>(SB), RODATA, $0x40
  1558DATA  expandAVX512_30_inShuf0<>+0x00(SB)/8, $0x0202010101000000
  1559DATA  expandAVX512_30_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
  1560DATA  expandAVX512_30_inShuf0<>+0x10(SB)/8, $0xffff010101000000
  1561DATA  expandAVX512_30_inShuf0<>+0x18(SB)/8, $0xffffffffffff0100
  1562DATA  expandAVX512_30_inShuf0<>+0x20(SB)/8, $0xffff010101000000
  1563DATA  expandAVX512_30_inShuf0<>+0x28(SB)/8, $0xffffffffffff0100
  1564DATA  expandAVX512_30_inShuf0<>+0x30(SB)/8, $0xffff010101000000
  1565DATA  expandAVX512_30_inShuf0<>+0x38(SB)/8, $0xffff010101000000
  1566
  1567GLOBL expandAVX512_30_mat0<>(SB), RODATA, $0x40
  1568DATA  expandAVX512_30_mat0<>+0x00(SB)/8, $0x0101010101010101
  1569DATA  expandAVX512_30_mat0<>+0x08(SB)/8, $0x0101010101010202
  1570DATA  expandAVX512_30_mat0<>+0x10(SB)/8, $0x0202020202020202
  1571DATA  expandAVX512_30_mat0<>+0x18(SB)/8, $0x0202020204040404
  1572DATA  expandAVX512_30_mat0<>+0x20(SB)/8, $0x0404040404040404
  1573DATA  expandAVX512_30_mat0<>+0x28(SB)/8, $0x0404080808080808
  1574DATA  expandAVX512_30_mat0<>+0x30(SB)/8, $0x0808080808080808
  1575DATA  expandAVX512_30_mat0<>+0x38(SB)/8, $0x1010101010101010
  1576
  1577GLOBL expandAVX512_30_inShuf1<>(SB), RODATA, $0x40
  1578DATA  expandAVX512_30_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
  1579DATA  expandAVX512_30_inShuf1<>+0x08(SB)/8, $0xffff010101000000
  1580DATA  expandAVX512_30_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
  1581DATA  expandAVX512_30_inShuf1<>+0x18(SB)/8, $0xffff010101000000
  1582DATA  expandAVX512_30_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
  1583DATA  expandAVX512_30_inShuf1<>+0x28(SB)/8, $0xffff010101000000
  1584DATA  expandAVX512_30_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
  1585DATA  expandAVX512_30_inShuf1<>+0x38(SB)/8, $0x0404030303020202
  1586
  1587GLOBL expandAVX512_30_mat1<>(SB), RODATA, $0x40
  1588DATA  expandAVX512_30_mat1<>+0x00(SB)/8, $0x1010101010102020
  1589DATA  expandAVX512_30_mat1<>+0x08(SB)/8, $0x2020202020202020
  1590DATA  expandAVX512_30_mat1<>+0x10(SB)/8, $0x2020202040404040
  1591DATA  expandAVX512_30_mat1<>+0x18(SB)/8, $0x4040404040404040
  1592DATA  expandAVX512_30_mat1<>+0x20(SB)/8, $0x4040808080808080
  1593DATA  expandAVX512_30_mat1<>+0x28(SB)/8, $0x8080808080808080
  1594DATA  expandAVX512_30_mat1<>+0x30(SB)/8, $0x0101010101010101
  1595DATA  expandAVX512_30_mat1<>+0x38(SB)/8, $0x0202020202020202
  1596
  1597GLOBL expandAVX512_30_inShuf2<>(SB), RODATA, $0x40
  1598DATA  expandAVX512_30_inShuf2<>+0x00(SB)/8, $0xffffffffff040302
  1599DATA  expandAVX512_30_inShuf2<>+0x08(SB)/8, $0xffff030303020202
  1600DATA  expandAVX512_30_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
  1601DATA  expandAVX512_30_inShuf2<>+0x18(SB)/8, $0xffff030303020202
  1602DATA  expandAVX512_30_inShuf2<>+0x20(SB)/8, $0xffff030303020202
  1603DATA  expandAVX512_30_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
  1604DATA  expandAVX512_30_inShuf2<>+0x30(SB)/8, $0xffff030303020202
  1605DATA  expandAVX512_30_inShuf2<>+0x38(SB)/8, $0xffffffffffff0302
  1606
  1607GLOBL expandAVX512_30_mat2<>(SB), RODATA, $0x40
  1608DATA  expandAVX512_30_mat2<>+0x00(SB)/8, $0x0202020204040404
  1609DATA  expandAVX512_30_mat2<>+0x08(SB)/8, $0x0404040404040404
  1610DATA  expandAVX512_30_mat2<>+0x10(SB)/8, $0x0404080808080808
  1611DATA  expandAVX512_30_mat2<>+0x18(SB)/8, $0x0808080808080808
  1612DATA  expandAVX512_30_mat2<>+0x20(SB)/8, $0x1010101010101010
  1613DATA  expandAVX512_30_mat2<>+0x28(SB)/8, $0x1010101010102020
  1614DATA  expandAVX512_30_mat2<>+0x30(SB)/8, $0x2020202020202020
  1615DATA  expandAVX512_30_mat2<>+0x38(SB)/8, $0x2020202040404040
  1616
  1617GLOBL expandAVX512_30_inShuf3<>(SB), RODATA, $0x40
  1618DATA  expandAVX512_30_inShuf3<>+0x00(SB)/8, $0xffff030303020202
  1619DATA  expandAVX512_30_inShuf3<>+0x08(SB)/8, $0xffffffffffff0302
  1620DATA  expandAVX512_30_inShuf3<>+0x10(SB)/8, $0xffff030303020202
  1621DATA  expandAVX512_30_inShuf3<>+0x18(SB)/8, $0xffff040404030303
  1622DATA  expandAVX512_30_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
  1623DATA  expandAVX512_30_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
  1624DATA  expandAVX512_30_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1625DATA  expandAVX512_30_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1626
  1627GLOBL expandAVX512_30_mat3<>(SB), RODATA, $0x40
  1628DATA  expandAVX512_30_mat3<>+0x00(SB)/8, $0x4040404040404040
  1629DATA  expandAVX512_30_mat3<>+0x08(SB)/8, $0x4040808080808080
  1630DATA  expandAVX512_30_mat3<>+0x10(SB)/8, $0x8080808080808080
  1631DATA  expandAVX512_30_mat3<>+0x18(SB)/8, $0x0101010101010101
  1632DATA  expandAVX512_30_mat3<>+0x20(SB)/8, $0x0101010101010202
  1633DATA  expandAVX512_30_mat3<>+0x28(SB)/8, $0x0202020202020202
  1634DATA  expandAVX512_30_mat3<>+0x30(SB)/8, $0x0000000000000000
  1635DATA  expandAVX512_30_mat3<>+0x38(SB)/8, $0x0000000000000000
  1636
  1637GLOBL expandAVX512_30_outShufLo(SB), RODATA, $0x40
  1638DATA  expandAVX512_30_outShufLo+0x00(SB)/8, $0x1812111008020100
  1639DATA  expandAVX512_30_outShufLo+0x08(SB)/8, $0x3832313028222120
  1640DATA  expandAVX512_30_outShufLo+0x10(SB)/8, $0x58504a4948403a39
  1641DATA  expandAVX512_30_outShufLo+0x18(SB)/8, $0x04036a6968605a59
  1642DATA  expandAVX512_30_outShufLo+0x20(SB)/8, $0x2423191514130905
  1643DATA  expandAVX512_30_outShufLo+0x28(SB)/8, $0x3d3c3b3534332925
  1644DATA  expandAVX512_30_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b41
  1645DATA  expandAVX512_30_outShufLo+0x38(SB)/8, $0x0a7007066d6c6b61
  1646
  1647GLOBL expandAVX512_30_outShufHi0(SB), RODATA, $0x40
  1648DATA  expandAVX512_30_outShufHi0+0x00(SB)/8, $0x504a4948403a3938
  1649DATA  expandAVX512_30_outShufHi0+0x08(SB)/8, $0x70686261605a5958
  1650DATA  expandAVX512_30_outShufHi0+0x10(SB)/8, $0xffffffffff787271
  1651DATA  expandAVX512_30_outShufHi0+0x18(SB)/8, $0x3c3bffffffffffff
  1652DATA  expandAVX512_30_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b413d
  1653DATA  expandAVX512_30_outShufHi0+0x28(SB)/8, $0x757473696564635d
  1654DATA  expandAVX512_30_outShufHi0+0x30(SB)/8, $0xffffffffffffff79
  1655DATA  expandAVX512_30_outShufHi0+0x38(SB)/8, $0x42ff3f3effffffff
  1656
  1657GLOBL expandAVX512_30_outShufHi1(SB), RODATA, $0x40
  1658DATA  expandAVX512_30_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1659DATA  expandAVX512_30_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1660DATA  expandAVX512_30_outShufHi1+0x10(SB)/8, $0x1008020100ffffff
  1661DATA  expandAVX512_30_outShufHi1+0x18(SB)/8, $0xffff201a19181211
  1662DATA  expandAVX512_30_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1663DATA  expandAVX512_30_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  1664DATA  expandAVX512_30_outShufHi1+0x30(SB)/8, $0x15141309050403ff
  1665DATA  expandAVX512_30_outShufHi1+0x38(SB)/8, $0xff28ffff211d1c1b
  1666
  1667TEXT expandAVX512_30<>(SB), NOSPLIT, $0-0
  1668	VMOVDQU64 expandAVX512_30_inShuf0<>(SB), Z0
  1669	VMOVDQU64 expandAVX512_30_inShuf1<>(SB), Z2
  1670	VMOVDQU64 expandAVX512_30_inShuf2<>(SB), Z3
  1671	VMOVDQU64 expandAVX512_30_inShuf3<>(SB), Z4
  1672	VMOVDQU64 expandAVX512_30_outShufLo(SB), Z1
  1673	VMOVDQU64 expandAVX512_30_outShufHi0(SB), Z5
  1674	VMOVDQU64 expandAVX512_30_outShufHi1(SB), Z6
  1675	VMOVDQU64 (AX), Z7
  1676	VPERMB Z7, Z0, Z0
  1677	VGF2P8AFFINEQB $0, expandAVX512_30_mat0<>(SB), Z0, Z0
  1678	VPERMB Z7, Z2, Z2
  1679	VGF2P8AFFINEQB $0, expandAVX512_30_mat1<>(SB), Z2, Z2
  1680	VPERMB Z7, Z3, Z3
  1681	VGF2P8AFFINEQB $0, expandAVX512_30_mat2<>(SB), Z3, Z3
  1682	VPERMB Z7, Z4, Z4
  1683	VGF2P8AFFINEQB $0, expandAVX512_30_mat3<>(SB), Z4, Z4
  1684	VPERMI2B Z2, Z0, Z1
  1685	MOVQ $0xb001ffffc007ffff, AX
  1686	KMOVQ AX, K1
  1687	VPERMI2B.Z Z3, Z2, K1, Z5
  1688	MOVQ $0x4ffe00003ff80000, AX
  1689	KMOVQ AX, K1
  1690	VPERMB.Z Z4, Z6, K1, Z0
  1691	VPORQ Z0, Z5, Z2
  1692	RET
  1693
  1694GLOBL expandAVX512_32_inShuf0<>(SB), RODATA, $0x40
  1695DATA  expandAVX512_32_inShuf0<>+0x00(SB)/8, $0x0101010100000000
  1696DATA  expandAVX512_32_inShuf0<>+0x08(SB)/8, $0x0101010100000000
  1697DATA  expandAVX512_32_inShuf0<>+0x10(SB)/8, $0x0101010100000000
  1698DATA  expandAVX512_32_inShuf0<>+0x18(SB)/8, $0x0101010100000000
  1699DATA  expandAVX512_32_inShuf0<>+0x20(SB)/8, $0x0101010100000000
  1700DATA  expandAVX512_32_inShuf0<>+0x28(SB)/8, $0x0101010100000000
  1701DATA  expandAVX512_32_inShuf0<>+0x30(SB)/8, $0x0101010100000000
  1702DATA  expandAVX512_32_inShuf0<>+0x38(SB)/8, $0x0101010100000000
  1703
  1704GLOBL expandAVX512_32_mat0<>(SB), RODATA, $0x40
  1705DATA  expandAVX512_32_mat0<>+0x00(SB)/8, $0x0101010101010101
  1706DATA  expandAVX512_32_mat0<>+0x08(SB)/8, $0x0202020202020202
  1707DATA  expandAVX512_32_mat0<>+0x10(SB)/8, $0x0404040404040404
  1708DATA  expandAVX512_32_mat0<>+0x18(SB)/8, $0x0808080808080808
  1709DATA  expandAVX512_32_mat0<>+0x20(SB)/8, $0x1010101010101010
  1710DATA  expandAVX512_32_mat0<>+0x28(SB)/8, $0x2020202020202020
  1711DATA  expandAVX512_32_mat0<>+0x30(SB)/8, $0x4040404040404040
  1712DATA  expandAVX512_32_mat0<>+0x38(SB)/8, $0x8080808080808080
  1713
  1714GLOBL expandAVX512_32_inShuf1<>(SB), RODATA, $0x40
  1715DATA  expandAVX512_32_inShuf1<>+0x00(SB)/8, $0x0303030302020202
  1716DATA  expandAVX512_32_inShuf1<>+0x08(SB)/8, $0x0303030302020202
  1717DATA  expandAVX512_32_inShuf1<>+0x10(SB)/8, $0x0303030302020202
  1718DATA  expandAVX512_32_inShuf1<>+0x18(SB)/8, $0x0303030302020202
  1719DATA  expandAVX512_32_inShuf1<>+0x20(SB)/8, $0x0303030302020202
  1720DATA  expandAVX512_32_inShuf1<>+0x28(SB)/8, $0x0303030302020202
  1721DATA  expandAVX512_32_inShuf1<>+0x30(SB)/8, $0x0303030302020202
  1722DATA  expandAVX512_32_inShuf1<>+0x38(SB)/8, $0x0303030302020202
  1723
  1724GLOBL expandAVX512_32_outShufLo(SB), RODATA, $0x40
  1725DATA  expandAVX512_32_outShufLo+0x00(SB)/8, $0x0b0a090803020100
  1726DATA  expandAVX512_32_outShufLo+0x08(SB)/8, $0x1b1a191813121110
  1727DATA  expandAVX512_32_outShufLo+0x10(SB)/8, $0x2b2a292823222120
  1728DATA  expandAVX512_32_outShufLo+0x18(SB)/8, $0x3b3a393833323130
  1729DATA  expandAVX512_32_outShufLo+0x20(SB)/8, $0x0f0e0d0c07060504
  1730DATA  expandAVX512_32_outShufLo+0x28(SB)/8, $0x1f1e1d1c17161514
  1731DATA  expandAVX512_32_outShufLo+0x30(SB)/8, $0x2f2e2d2c27262524
  1732DATA  expandAVX512_32_outShufLo+0x38(SB)/8, $0x3f3e3d3c37363534
  1733
  1734TEXT expandAVX512_32<>(SB), NOSPLIT, $0-0
  1735	VMOVDQU64 expandAVX512_32_inShuf0<>(SB), Z0
  1736	VMOVDQU64 expandAVX512_32_mat0<>(SB), Z1
  1737	VMOVDQU64 expandAVX512_32_inShuf1<>(SB), Z2
  1738	VMOVDQU64 expandAVX512_32_outShufLo(SB), Z3
  1739	VMOVDQU64 (AX), Z4
  1740	VPERMB Z4, Z0, Z0
  1741	VGF2P8AFFINEQB $0, Z1, Z0, Z0
  1742	VPERMB Z4, Z2, Z2
  1743	VGF2P8AFFINEQB $0, Z1, Z2, Z2
  1744	VPERMB Z0, Z3, Z1
  1745	VPERMB Z2, Z3, Z2
  1746	RET
  1747
  1748GLOBL expandAVX512_36_inShuf0<>(SB), RODATA, $0x40
  1749DATA  expandAVX512_36_inShuf0<>+0x00(SB)/8, $0x0101010100000000
  1750DATA  expandAVX512_36_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
  1751DATA  expandAVX512_36_inShuf0<>+0x10(SB)/8, $0x0101010100000000
  1752DATA  expandAVX512_36_inShuf0<>+0x18(SB)/8, $0x0101010100000000
  1753DATA  expandAVX512_36_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
  1754DATA  expandAVX512_36_inShuf0<>+0x28(SB)/8, $0x0101010100000000
  1755DATA  expandAVX512_36_inShuf0<>+0x30(SB)/8, $0x0101010100000000
  1756DATA  expandAVX512_36_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
  1757
  1758GLOBL expandAVX512_36_mat0<>(SB), RODATA, $0x40
  1759DATA  expandAVX512_36_mat0<>+0x00(SB)/8, $0x0101010101010101
  1760DATA  expandAVX512_36_mat0<>+0x08(SB)/8, $0x0101010102020202
  1761DATA  expandAVX512_36_mat0<>+0x10(SB)/8, $0x0202020202020202
  1762DATA  expandAVX512_36_mat0<>+0x18(SB)/8, $0x0404040404040404
  1763DATA  expandAVX512_36_mat0<>+0x20(SB)/8, $0x0404040408080808
  1764DATA  expandAVX512_36_mat0<>+0x28(SB)/8, $0x0808080808080808
  1765DATA  expandAVX512_36_mat0<>+0x30(SB)/8, $0x1010101010101010
  1766DATA  expandAVX512_36_mat0<>+0x38(SB)/8, $0x1010101020202020
  1767
  1768GLOBL expandAVX512_36_inShuf1<>(SB), RODATA, $0x40
  1769DATA  expandAVX512_36_inShuf1<>+0x00(SB)/8, $0x0101010100000000
  1770DATA  expandAVX512_36_inShuf1<>+0x08(SB)/8, $0xffffff0100000000
  1771DATA  expandAVX512_36_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
  1772DATA  expandAVX512_36_inShuf1<>+0x18(SB)/8, $0xffffffff00000000
  1773DATA  expandAVX512_36_inShuf1<>+0x20(SB)/8, $0xff02020202010101
  1774DATA  expandAVX512_36_inShuf1<>+0x28(SB)/8, $0xffffffffffff0201
  1775DATA  expandAVX512_36_inShuf1<>+0x30(SB)/8, $0x0202020201010101
  1776DATA  expandAVX512_36_inShuf1<>+0x38(SB)/8, $0x0303030302020202
  1777
  1778GLOBL expandAVX512_36_mat1<>(SB), RODATA, $0x40
  1779DATA  expandAVX512_36_mat1<>+0x00(SB)/8, $0x2020202020202020
  1780DATA  expandAVX512_36_mat1<>+0x08(SB)/8, $0x4040404040404040
  1781DATA  expandAVX512_36_mat1<>+0x10(SB)/8, $0x4040404080808080
  1782DATA  expandAVX512_36_mat1<>+0x18(SB)/8, $0x8080808080808080
  1783DATA  expandAVX512_36_mat1<>+0x20(SB)/8, $0x4040404040404040
  1784DATA  expandAVX512_36_mat1<>+0x28(SB)/8, $0x4040404080808080
  1785DATA  expandAVX512_36_mat1<>+0x30(SB)/8, $0x8080808080808080
  1786DATA  expandAVX512_36_mat1<>+0x38(SB)/8, $0x0101010101010101
  1787
  1788GLOBL expandAVX512_36_inShuf2<>(SB), RODATA, $0x40
  1789DATA  expandAVX512_36_inShuf2<>+0x00(SB)/8, $0xffffffffffff0302
  1790DATA  expandAVX512_36_inShuf2<>+0x08(SB)/8, $0x0303030302020202
  1791DATA  expandAVX512_36_inShuf2<>+0x10(SB)/8, $0x0303030302020202
  1792DATA  expandAVX512_36_inShuf2<>+0x18(SB)/8, $0xffffffffffff0302
  1793DATA  expandAVX512_36_inShuf2<>+0x20(SB)/8, $0x0303030302020202
  1794DATA  expandAVX512_36_inShuf2<>+0x28(SB)/8, $0xffff030302020202
  1795DATA  expandAVX512_36_inShuf2<>+0x30(SB)/8, $0xffffffffffffff02
  1796DATA  expandAVX512_36_inShuf2<>+0x38(SB)/8, $0xffffffff02020202
  1797
  1798GLOBL expandAVX512_36_mat2<>(SB), RODATA, $0x40
  1799DATA  expandAVX512_36_mat2<>+0x00(SB)/8, $0x0101010102020202
  1800DATA  expandAVX512_36_mat2<>+0x08(SB)/8, $0x0202020202020202
  1801DATA  expandAVX512_36_mat2<>+0x10(SB)/8, $0x0404040404040404
  1802DATA  expandAVX512_36_mat2<>+0x18(SB)/8, $0x0404040408080808
  1803DATA  expandAVX512_36_mat2<>+0x20(SB)/8, $0x0808080808080808
  1804DATA  expandAVX512_36_mat2<>+0x28(SB)/8, $0x1010101010101010
  1805DATA  expandAVX512_36_mat2<>+0x30(SB)/8, $0x1010101020202020
  1806DATA  expandAVX512_36_mat2<>+0x38(SB)/8, $0x2020202020202020
  1807
  1808GLOBL expandAVX512_36_outShufLo(SB), RODATA, $0x40
  1809DATA  expandAVX512_36_outShufLo+0x00(SB)/8, $0x1211100803020100
  1810DATA  expandAVX512_36_outShufLo+0x08(SB)/8, $0x2928201b1a191813
  1811DATA  expandAVX512_36_outShufLo+0x10(SB)/8, $0x4038333231302b2a
  1812DATA  expandAVX512_36_outShufLo+0x18(SB)/8, $0x504b4a4948434241
  1813DATA  expandAVX512_36_outShufLo+0x20(SB)/8, $0x070605045b5a5958
  1814DATA  expandAVX512_36_outShufLo+0x28(SB)/8, $0x1e1d1c1716151409
  1815DATA  expandAVX512_36_outShufLo+0x30(SB)/8, $0x35342f2e2d2c211f
  1816DATA  expandAVX512_36_outShufLo+0x38(SB)/8, $0x4c47464544393736
  1817
  1818GLOBL expandAVX512_36_outShufHi(SB), RODATA, $0x40
  1819DATA  expandAVX512_36_outShufHi+0x00(SB)/8, $0x3332313028222120
  1820DATA  expandAVX512_36_outShufHi+0x08(SB)/8, $0x4a4948403b3a3938
  1821DATA  expandAVX512_36_outShufHi+0x10(SB)/8, $0x616058535251504b
  1822DATA  expandAVX512_36_outShufHi+0x18(SB)/8, $0x78706b6a69686362
  1823DATA  expandAVX512_36_outShufHi+0x20(SB)/8, $0x29262524237b7a79
  1824DATA  expandAVX512_36_outShufHi+0x28(SB)/8, $0x3f3e3d3c37363534
  1825DATA  expandAVX512_36_outShufHi+0x30(SB)/8, $0x5655544f4e4d4c41
  1826DATA  expandAVX512_36_outShufHi+0x38(SB)/8, $0x6d6c676665645957
  1827
  1828TEXT expandAVX512_36<>(SB), NOSPLIT, $0-0
  1829	VMOVDQU64 expandAVX512_36_inShuf0<>(SB), Z0
  1830	VMOVDQU64 expandAVX512_36_inShuf1<>(SB), Z3
  1831	VMOVDQU64 expandAVX512_36_inShuf2<>(SB), Z4
  1832	VMOVDQU64 expandAVX512_36_outShufLo(SB), Z1
  1833	VMOVDQU64 expandAVX512_36_outShufHi(SB), Z2
  1834	VMOVDQU64 (AX), Z5
  1835	VPERMB Z5, Z0, Z0
  1836	VGF2P8AFFINEQB $0, expandAVX512_36_mat0<>(SB), Z0, Z0
  1837	VPERMB Z5, Z3, Z3
  1838	VGF2P8AFFINEQB $0, expandAVX512_36_mat1<>(SB), Z3, Z3
  1839	VPERMB Z5, Z4, Z4
  1840	VGF2P8AFFINEQB $0, expandAVX512_36_mat2<>(SB), Z4, Z4
  1841	VPERMI2B Z3, Z0, Z1
  1842	VPERMI2B Z4, Z3, Z2
  1843	RET
  1844
  1845GLOBL expandAVX512_40_inShuf0<>(SB), RODATA, $0x40
  1846DATA  expandAVX512_40_inShuf0<>+0x00(SB)/8, $0x0101010000000000
  1847DATA  expandAVX512_40_inShuf0<>+0x08(SB)/8, $0x0101010000000000
  1848DATA  expandAVX512_40_inShuf0<>+0x10(SB)/8, $0x0101010000000000
  1849DATA  expandAVX512_40_inShuf0<>+0x18(SB)/8, $0x0101010000000000
  1850DATA  expandAVX512_40_inShuf0<>+0x20(SB)/8, $0x0101010000000000
  1851DATA  expandAVX512_40_inShuf0<>+0x28(SB)/8, $0xffffff0000000000
  1852DATA  expandAVX512_40_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
  1853DATA  expandAVX512_40_inShuf0<>+0x38(SB)/8, $0xffffff0000000000
  1854
  1855GLOBL expandAVX512_40_mat0<>(SB), RODATA, $0x40
  1856DATA  expandAVX512_40_mat0<>+0x00(SB)/8, $0x0101010101010101
  1857DATA  expandAVX512_40_mat0<>+0x08(SB)/8, $0x0202020202020202
  1858DATA  expandAVX512_40_mat0<>+0x10(SB)/8, $0x0404040404040404
  1859DATA  expandAVX512_40_mat0<>+0x18(SB)/8, $0x0808080808080808
  1860DATA  expandAVX512_40_mat0<>+0x20(SB)/8, $0x1010101010101010
  1861DATA  expandAVX512_40_mat0<>+0x28(SB)/8, $0x2020202020202020
  1862DATA  expandAVX512_40_mat0<>+0x30(SB)/8, $0x4040404040404040
  1863DATA  expandAVX512_40_mat0<>+0x38(SB)/8, $0x8080808080808080
  1864
  1865GLOBL expandAVX512_40_inShuf1<>(SB), RODATA, $0x40
  1866DATA  expandAVX512_40_inShuf1<>+0x00(SB)/8, $0xffffffffffff0101
  1867DATA  expandAVX512_40_inShuf1<>+0x08(SB)/8, $0xffffffffffff0101
  1868DATA  expandAVX512_40_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
  1869DATA  expandAVX512_40_inShuf1<>+0x18(SB)/8, $0xffffffffffff0101
  1870DATA  expandAVX512_40_inShuf1<>+0x20(SB)/8, $0xffffffffffffff01
  1871DATA  expandAVX512_40_inShuf1<>+0x28(SB)/8, $0xffff020202020201
  1872DATA  expandAVX512_40_inShuf1<>+0x30(SB)/8, $0x0202020101010101
  1873DATA  expandAVX512_40_inShuf1<>+0x38(SB)/8, $0x0202020101010101
  1874
  1875GLOBL expandAVX512_40_mat1<>(SB), RODATA, $0x40
  1876DATA  expandAVX512_40_mat1<>+0x00(SB)/8, $0x0101010101010101
  1877DATA  expandAVX512_40_mat1<>+0x08(SB)/8, $0x0202020202020202
  1878DATA  expandAVX512_40_mat1<>+0x10(SB)/8, $0x0404040404040404
  1879DATA  expandAVX512_40_mat1<>+0x18(SB)/8, $0x0808080808080808
  1880DATA  expandAVX512_40_mat1<>+0x20(SB)/8, $0x1010101010101010
  1881DATA  expandAVX512_40_mat1<>+0x28(SB)/8, $0x1010101010101010
  1882DATA  expandAVX512_40_mat1<>+0x30(SB)/8, $0x2020202020202020
  1883DATA  expandAVX512_40_mat1<>+0x38(SB)/8, $0x4040404040404040
  1884
  1885GLOBL expandAVX512_40_inShuf2<>(SB), RODATA, $0x40
  1886DATA  expandAVX512_40_inShuf2<>+0x00(SB)/8, $0x0202020101010101
  1887DATA  expandAVX512_40_inShuf2<>+0x08(SB)/8, $0x0303030202020202
  1888DATA  expandAVX512_40_inShuf2<>+0x10(SB)/8, $0x0303030202020202
  1889DATA  expandAVX512_40_inShuf2<>+0x18(SB)/8, $0xffffff0202020202
  1890DATA  expandAVX512_40_inShuf2<>+0x20(SB)/8, $0xffffff0202020202
  1891DATA  expandAVX512_40_inShuf2<>+0x28(SB)/8, $0xffffffffffff0202
  1892DATA  expandAVX512_40_inShuf2<>+0x30(SB)/8, $0xffffffffffff0202
  1893DATA  expandAVX512_40_inShuf2<>+0x38(SB)/8, $0xffffffffffff0202
  1894
  1895GLOBL expandAVX512_40_mat2<>(SB), RODATA, $0x40
  1896DATA  expandAVX512_40_mat2<>+0x00(SB)/8, $0x8080808080808080
  1897DATA  expandAVX512_40_mat2<>+0x08(SB)/8, $0x0101010101010101
  1898DATA  expandAVX512_40_mat2<>+0x10(SB)/8, $0x0202020202020202
  1899DATA  expandAVX512_40_mat2<>+0x18(SB)/8, $0x0404040404040404
  1900DATA  expandAVX512_40_mat2<>+0x20(SB)/8, $0x0808080808080808
  1901DATA  expandAVX512_40_mat2<>+0x28(SB)/8, $0x2020202020202020
  1902DATA  expandAVX512_40_mat2<>+0x30(SB)/8, $0x4040404040404040
  1903DATA  expandAVX512_40_mat2<>+0x38(SB)/8, $0x8080808080808080
  1904
  1905GLOBL expandAVX512_40_inShuf3<>(SB), RODATA, $0x40
  1906DATA  expandAVX512_40_inShuf3<>+0x00(SB)/8, $0xffffffffffff0303
  1907DATA  expandAVX512_40_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
  1908DATA  expandAVX512_40_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
  1909DATA  expandAVX512_40_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
  1910DATA  expandAVX512_40_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  1911DATA  expandAVX512_40_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  1912DATA  expandAVX512_40_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1913DATA  expandAVX512_40_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1914
  1915GLOBL expandAVX512_40_mat3<>(SB), RODATA, $0x40
  1916DATA  expandAVX512_40_mat3<>+0x00(SB)/8, $0x0101010101010101
  1917DATA  expandAVX512_40_mat3<>+0x08(SB)/8, $0x0000000000000000
  1918DATA  expandAVX512_40_mat3<>+0x10(SB)/8, $0x0000000000000000
  1919DATA  expandAVX512_40_mat3<>+0x18(SB)/8, $0x0000000000000000
  1920DATA  expandAVX512_40_mat3<>+0x20(SB)/8, $0x0000000000000000
  1921DATA  expandAVX512_40_mat3<>+0x28(SB)/8, $0x0000000000000000
  1922DATA  expandAVX512_40_mat3<>+0x30(SB)/8, $0x0000000000000000
  1923DATA  expandAVX512_40_mat3<>+0x38(SB)/8, $0x0000000000000000
  1924
  1925GLOBL expandAVX512_40_outShufLo(SB), RODATA, $0x40
  1926DATA  expandAVX512_40_outShufLo+0x00(SB)/8, $0x0a09080403020100
  1927DATA  expandAVX512_40_outShufLo+0x08(SB)/8, $0x1814131211100c0b
  1928DATA  expandAVX512_40_outShufLo+0x10(SB)/8, $0x232221201c1b1a19
  1929DATA  expandAVX512_40_outShufLo+0x18(SB)/8, $0x31302c2b2a292824
  1930DATA  expandAVX512_40_outShufLo+0x20(SB)/8, $0x3c3b3a3938343332
  1931DATA  expandAVX512_40_outShufLo+0x28(SB)/8, $0x0f0e0d4140070605
  1932DATA  expandAVX512_40_outShufLo+0x30(SB)/8, $0x1d51501716154948
  1933DATA  expandAVX512_40_outShufLo+0x38(SB)/8, $0x6027262559581f1e
  1934
  1935GLOBL expandAVX512_40_outShufHi0(SB), RODATA, $0x40
  1936DATA  expandAVX512_40_outShufHi0+0x00(SB)/8, $0x3938343332313028
  1937DATA  expandAVX512_40_outShufHi0+0x08(SB)/8, $0x44434241403c3b3a
  1938DATA  expandAVX512_40_outShufHi0+0x10(SB)/8, $0x5251504c4b4a4948
  1939DATA  expandAVX512_40_outShufHi0+0x18(SB)/8, $0x605c5b5a59585453
  1940DATA  expandAVX512_40_outShufHi0+0x20(SB)/8, $0x2c2b2a2964636261
  1941DATA  expandAVX512_40_outShufHi0+0x28(SB)/8, $0x3e3d69683736352d
  1942DATA  expandAVX512_40_outShufHi0+0x30(SB)/8, $0x797847464571703f
  1943DATA  expandAVX512_40_outShufHi0+0x38(SB)/8, $0x575655ffff4f4e4d
  1944
  1945GLOBL expandAVX512_40_outShufHi1(SB), RODATA, $0x40
  1946DATA  expandAVX512_40_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1947DATA  expandAVX512_40_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1948DATA  expandAVX512_40_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  1949DATA  expandAVX512_40_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  1950DATA  expandAVX512_40_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1951DATA  expandAVX512_40_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  1952DATA  expandAVX512_40_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
  1953DATA  expandAVX512_40_outShufHi1+0x38(SB)/8, $0xffffff0100ffffff
  1954
  1955TEXT expandAVX512_40<>(SB), NOSPLIT, $0-0
  1956	VMOVDQU64 expandAVX512_40_inShuf0<>(SB), Z0
  1957	VMOVDQU64 expandAVX512_40_inShuf1<>(SB), Z2
  1958	VMOVDQU64 expandAVX512_40_inShuf2<>(SB), Z3
  1959	VMOVDQU64 expandAVX512_40_inShuf3<>(SB), Z4
  1960	VMOVDQU64 expandAVX512_40_outShufLo(SB), Z1
  1961	VMOVDQU64 expandAVX512_40_outShufHi0(SB), Z5
  1962	VMOVDQU64 expandAVX512_40_outShufHi1(SB), Z6
  1963	VMOVDQU64 (AX), Z7
  1964	VPERMB Z7, Z0, Z0
  1965	VGF2P8AFFINEQB $0, expandAVX512_40_mat0<>(SB), Z0, Z0
  1966	VPERMB Z7, Z2, Z2
  1967	VGF2P8AFFINEQB $0, expandAVX512_40_mat1<>(SB), Z2, Z2
  1968	VPERMB Z7, Z3, Z3
  1969	VGF2P8AFFINEQB $0, expandAVX512_40_mat2<>(SB), Z3, Z3
  1970	VPERMB Z7, Z4, Z4
  1971	VGF2P8AFFINEQB $0, expandAVX512_40_mat3<>(SB), Z4, Z4
  1972	VPERMI2B Z2, Z0, Z1
  1973	MOVQ $0xe7ffffffffffffff, AX
  1974	KMOVQ AX, K1
  1975	VPERMI2B.Z Z3, Z2, K1, Z5
  1976	MOVQ $0x1800000000000000, AX
  1977	KMOVQ AX, K1
  1978	VPERMB.Z Z4, Z6, K1, Z0
  1979	VPORQ Z0, Z5, Z2
  1980	RET
  1981
  1982GLOBL expandAVX512_44_inShuf0<>(SB), RODATA, $0x40
  1983DATA  expandAVX512_44_inShuf0<>+0x00(SB)/8, $0x0101010000000000
  1984DATA  expandAVX512_44_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
  1985DATA  expandAVX512_44_inShuf0<>+0x10(SB)/8, $0x0101010000000000
  1986DATA  expandAVX512_44_inShuf0<>+0x18(SB)/8, $0x0101010000000000
  1987DATA  expandAVX512_44_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
  1988DATA  expandAVX512_44_inShuf0<>+0x28(SB)/8, $0x0101010000000000
  1989DATA  expandAVX512_44_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
  1990DATA  expandAVX512_44_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
  1991
  1992GLOBL expandAVX512_44_mat0<>(SB), RODATA, $0x40
  1993DATA  expandAVX512_44_mat0<>+0x00(SB)/8, $0x0101010101010101
  1994DATA  expandAVX512_44_mat0<>+0x08(SB)/8, $0x0101010102020202
  1995DATA  expandAVX512_44_mat0<>+0x10(SB)/8, $0x0202020202020202
  1996DATA  expandAVX512_44_mat0<>+0x18(SB)/8, $0x0404040404040404
  1997DATA  expandAVX512_44_mat0<>+0x20(SB)/8, $0x0404040408080808
  1998DATA  expandAVX512_44_mat0<>+0x28(SB)/8, $0x0808080808080808
  1999DATA  expandAVX512_44_mat0<>+0x30(SB)/8, $0x1010101010101010
  2000DATA  expandAVX512_44_mat0<>+0x38(SB)/8, $0x1010101020202020
  2001
  2002GLOBL expandAVX512_44_inShuf1<>(SB), RODATA, $0x40
  2003DATA  expandAVX512_44_inShuf1<>+0x00(SB)/8, $0xffffff0000000000
  2004DATA  expandAVX512_44_inShuf1<>+0x08(SB)/8, $0xffffff0000000000
  2005DATA  expandAVX512_44_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
  2006DATA  expandAVX512_44_inShuf1<>+0x18(SB)/8, $0xffffff0000000000
  2007DATA  expandAVX512_44_inShuf1<>+0x20(SB)/8, $0xffffffffffff0101
  2008DATA  expandAVX512_44_inShuf1<>+0x28(SB)/8, $0xffffffffffff0101
  2009DATA  expandAVX512_44_inShuf1<>+0x30(SB)/8, $0xffffffffffff0101
  2010DATA  expandAVX512_44_inShuf1<>+0x38(SB)/8, $0xff02020202020101
  2011
  2012GLOBL expandAVX512_44_mat1<>(SB), RODATA, $0x40
  2013DATA  expandAVX512_44_mat1<>+0x00(SB)/8, $0x2020202020202020
  2014DATA  expandAVX512_44_mat1<>+0x08(SB)/8, $0x4040404040404040
  2015DATA  expandAVX512_44_mat1<>+0x10(SB)/8, $0x4040404080808080
  2016DATA  expandAVX512_44_mat1<>+0x18(SB)/8, $0x8080808080808080
  2017DATA  expandAVX512_44_mat1<>+0x20(SB)/8, $0x0101010101010101
  2018DATA  expandAVX512_44_mat1<>+0x28(SB)/8, $0x0202020202020202
  2019DATA  expandAVX512_44_mat1<>+0x30(SB)/8, $0x0404040404040404
  2020DATA  expandAVX512_44_mat1<>+0x38(SB)/8, $0x0808080808080808
  2021
  2022GLOBL expandAVX512_44_inShuf2<>(SB), RODATA, $0x40
  2023DATA  expandAVX512_44_inShuf2<>+0x00(SB)/8, $0x0202020101010101
  2024DATA  expandAVX512_44_inShuf2<>+0x08(SB)/8, $0xffffffffffff0201
  2025DATA  expandAVX512_44_inShuf2<>+0x10(SB)/8, $0x0202020101010101
  2026DATA  expandAVX512_44_inShuf2<>+0x18(SB)/8, $0x0202020101010101
  2027DATA  expandAVX512_44_inShuf2<>+0x20(SB)/8, $0xffffffffffff0201
  2028DATA  expandAVX512_44_inShuf2<>+0x28(SB)/8, $0xffff020101010101
  2029DATA  expandAVX512_44_inShuf2<>+0x30(SB)/8, $0xffffff0202020202
  2030DATA  expandAVX512_44_inShuf2<>+0x38(SB)/8, $0xffffffffffffff02
  2031
  2032GLOBL expandAVX512_44_mat2<>(SB), RODATA, $0x40
  2033DATA  expandAVX512_44_mat2<>+0x00(SB)/8, $0x1010101010101010
  2034DATA  expandAVX512_44_mat2<>+0x08(SB)/8, $0x1010101020202020
  2035DATA  expandAVX512_44_mat2<>+0x10(SB)/8, $0x2020202020202020
  2036DATA  expandAVX512_44_mat2<>+0x18(SB)/8, $0x4040404040404040
  2037DATA  expandAVX512_44_mat2<>+0x20(SB)/8, $0x4040404080808080
  2038DATA  expandAVX512_44_mat2<>+0x28(SB)/8, $0x8080808080808080
  2039DATA  expandAVX512_44_mat2<>+0x30(SB)/8, $0x0101010101010101
  2040DATA  expandAVX512_44_mat2<>+0x38(SB)/8, $0x0101010102020202
  2041
  2042GLOBL expandAVX512_44_inShuf3<>(SB), RODATA, $0x40
  2043DATA  expandAVX512_44_inShuf3<>+0x00(SB)/8, $0xffffff0202020202
  2044DATA  expandAVX512_44_inShuf3<>+0x08(SB)/8, $0xffffff0202020202
  2045DATA  expandAVX512_44_inShuf3<>+0x10(SB)/8, $0xffffffffffffff02
  2046DATA  expandAVX512_44_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
  2047DATA  expandAVX512_44_inShuf3<>+0x20(SB)/8, $0xffffffffffff0202
  2048DATA  expandAVX512_44_inShuf3<>+0x28(SB)/8, $0xffffffffffff0202
  2049DATA  expandAVX512_44_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  2050DATA  expandAVX512_44_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  2051
  2052GLOBL expandAVX512_44_mat3<>(SB), RODATA, $0x40
  2053DATA  expandAVX512_44_mat3<>+0x00(SB)/8, $0x0202020202020202
  2054DATA  expandAVX512_44_mat3<>+0x08(SB)/8, $0x0404040404040404
  2055DATA  expandAVX512_44_mat3<>+0x10(SB)/8, $0x0404040408080808
  2056DATA  expandAVX512_44_mat3<>+0x18(SB)/8, $0x1010101010101010
  2057DATA  expandAVX512_44_mat3<>+0x20(SB)/8, $0x2020202020202020
  2058DATA  expandAVX512_44_mat3<>+0x28(SB)/8, $0x4040404040404040
  2059DATA  expandAVX512_44_mat3<>+0x30(SB)/8, $0x0000000000000000
  2060DATA  expandAVX512_44_mat3<>+0x38(SB)/8, $0x0000000000000000
  2061
  2062GLOBL expandAVX512_44_outShufLo(SB), RODATA, $0x40
  2063DATA  expandAVX512_44_outShufLo+0x00(SB)/8, $0x1110080403020100
  2064DATA  expandAVX512_44_outShufLo+0x08(SB)/8, $0x1c1b1a1918141312
  2065DATA  expandAVX512_44_outShufLo+0x10(SB)/8, $0x31302c2b2a292820
  2066DATA  expandAVX512_44_outShufLo+0x18(SB)/8, $0x4342414038343332
  2067DATA  expandAVX512_44_outShufLo+0x20(SB)/8, $0x58504c4b4a494844
  2068DATA  expandAVX512_44_outShufLo+0x28(SB)/8, $0x600706055c5b5a59
  2069DATA  expandAVX512_44_outShufLo+0x30(SB)/8, $0x1d69681716150961
  2070DATA  expandAVX512_44_outShufLo+0x38(SB)/8, $0x2f2e2d2171701f1e
  2071
  2072GLOBL expandAVX512_44_outShufHi0(SB), RODATA, $0x40
  2073DATA  expandAVX512_44_outShufHi0+0x00(SB)/8, $0x4844434241403938
  2074DATA  expandAVX512_44_outShufHi0+0x08(SB)/8, $0x5a59585453525150
  2075DATA  expandAVX512_44_outShufHi0+0x10(SB)/8, $0x6c6b6a6968605c5b
  2076DATA  expandAVX512_44_outShufHi0+0x18(SB)/8, $0xffff787473727170
  2077DATA  expandAVX512_44_outShufHi0+0x20(SB)/8, $0xffffffffffffffff
  2078DATA  expandAVX512_44_outShufHi0+0x28(SB)/8, $0x46453e3d3c3b3aff
  2079DATA  expandAVX512_44_outShufHi0+0x30(SB)/8, $0xff57565549ffff47
  2080DATA  expandAVX512_44_outShufHi0+0x38(SB)/8, $0x6d61ffff5f5e5dff
  2081
  2082GLOBL expandAVX512_44_outShufHi1(SB), RODATA, $0x40
  2083DATA  expandAVX512_44_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  2084DATA  expandAVX512_44_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  2085DATA  expandAVX512_44_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  2086DATA  expandAVX512_44_outShufHi1+0x18(SB)/8, $0x0100ffffffffffff
  2087DATA  expandAVX512_44_outShufHi1+0x20(SB)/8, $0x0c0b0a0908040302
  2088DATA  expandAVX512_44_outShufHi1+0x28(SB)/8, $0xffffffffffffff10
  2089DATA  expandAVX512_44_outShufHi1+0x30(SB)/8, $0x20ffffffff1918ff
  2090DATA  expandAVX512_44_outShufHi1+0x38(SB)/8, $0xffff2928ffffff21
  2091
  2092TEXT expandAVX512_44<>(SB), NOSPLIT, $0-0
  2093	VMOVDQU64 expandAVX512_44_inShuf0<>(SB), Z0
  2094	VMOVDQU64 expandAVX512_44_inShuf1<>(SB), Z2
  2095	VMOVDQU64 expandAVX512_44_inShuf2<>(SB), Z3
  2096	VMOVDQU64 expandAVX512_44_inShuf3<>(SB), Z4
  2097	VMOVDQU64 expandAVX512_44_outShufLo(SB), Z1
  2098	VMOVDQU64 expandAVX512_44_outShufHi0(SB), Z5
  2099	VMOVDQU64 expandAVX512_44_outShufHi1(SB), Z6
  2100	VMOVDQU64 (AX), Z7
  2101	VPERMB Z7, Z0, Z0
  2102	VGF2P8AFFINEQB $0, expandAVX512_44_mat0<>(SB), Z0, Z0
  2103	VPERMB Z7, Z2, Z2
  2104	VGF2P8AFFINEQB $0, expandAVX512_44_mat1<>(SB), Z2, Z2
  2105	VPERMB Z7, Z3, Z3
  2106	VGF2P8AFFINEQB $0, expandAVX512_44_mat2<>(SB), Z3, Z3
  2107	VPERMB Z7, Z4, Z4
  2108	VGF2P8AFFINEQB $0, expandAVX512_44_mat3<>(SB), Z4, Z4
  2109	VPERMI2B Z2, Z0, Z1
  2110	MOVQ $0xce79fe003fffffff, AX
  2111	KMOVQ AX, K1
  2112	VPERMI2B.Z Z3, Z2, K1, Z5
  2113	MOVQ $0x318601ffc0000000, AX
  2114	KMOVQ AX, K1
  2115	VPERMB.Z Z4, Z6, K1, Z0
  2116	VPORQ Z0, Z5, Z2
  2117	RET
  2118
  2119GLOBL expandAVX512_48_inShuf0<>(SB), RODATA, $0x40
  2120DATA  expandAVX512_48_inShuf0<>+0x00(SB)/8, $0x0101000000000000
  2121DATA  expandAVX512_48_inShuf0<>+0x08(SB)/8, $0x0101000000000000
  2122DATA  expandAVX512_48_inShuf0<>+0x10(SB)/8, $0x0101000000000000
  2123DATA  expandAVX512_48_inShuf0<>+0x18(SB)/8, $0xffff000000000000
  2124DATA  expandAVX512_48_inShuf0<>+0x20(SB)/8, $0xffff000000000000
  2125DATA  expandAVX512_48_inShuf0<>+0x28(SB)/8, $0xffff000000000000
  2126DATA  expandAVX512_48_inShuf0<>+0x30(SB)/8, $0xffff000000000000
  2127DATA  expandAVX512_48_inShuf0<>+0x38(SB)/8, $0xffff000000000000
  2128
  2129GLOBL expandAVX512_48_mat0<>(SB), RODATA, $0x40
  2130DATA  expandAVX512_48_mat0<>+0x00(SB)/8, $0x0101010101010101
  2131DATA  expandAVX512_48_mat0<>+0x08(SB)/8, $0x0202020202020202
  2132DATA  expandAVX512_48_mat0<>+0x10(SB)/8, $0x0404040404040404
  2133DATA  expandAVX512_48_mat0<>+0x18(SB)/8, $0x0808080808080808
  2134DATA  expandAVX512_48_mat0<>+0x20(SB)/8, $0x1010101010101010
  2135DATA  expandAVX512_48_mat0<>+0x28(SB)/8, $0x2020202020202020
  2136DATA  expandAVX512_48_mat0<>+0x30(SB)/8, $0x4040404040404040
  2137DATA  expandAVX512_48_mat0<>+0x38(SB)/8, $0x8080808080808080
  2138
  2139GLOBL expandAVX512_48_inShuf1<>(SB), RODATA, $0x40
  2140DATA  expandAVX512_48_inShuf1<>+0x00(SB)/8, $0xffffffff01010101
  2141DATA  expandAVX512_48_inShuf1<>+0x08(SB)/8, $0xffffffff01010101
  2142DATA  expandAVX512_48_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
  2143DATA  expandAVX512_48_inShuf1<>+0x18(SB)/8, $0x0202020202020101
  2144DATA  expandAVX512_48_inShuf1<>+0x20(SB)/8, $0x0202010101010101
  2145DATA  expandAVX512_48_inShuf1<>+0x28(SB)/8, $0x0202010101010101
  2146DATA  expandAVX512_48_inShuf1<>+0x30(SB)/8, $0x0202010101010101
  2147DATA  expandAVX512_48_inShuf1<>+0x38(SB)/8, $0xffff010101010101
  2148
  2149GLOBL expandAVX512_48_mat1<>(SB), RODATA, $0x40
  2150DATA  expandAVX512_48_mat1<>+0x00(SB)/8, $0x0101010101010101
  2151DATA  expandAVX512_48_mat1<>+0x08(SB)/8, $0x0202020202020202
  2152DATA  expandAVX512_48_mat1<>+0x10(SB)/8, $0x0404040404040404
  2153DATA  expandAVX512_48_mat1<>+0x18(SB)/8, $0x0404040404040404
  2154DATA  expandAVX512_48_mat1<>+0x20(SB)/8, $0x0808080808080808
  2155DATA  expandAVX512_48_mat1<>+0x28(SB)/8, $0x1010101010101010
  2156DATA  expandAVX512_48_mat1<>+0x30(SB)/8, $0x2020202020202020
  2157DATA  expandAVX512_48_mat1<>+0x38(SB)/8, $0x4040404040404040
  2158
  2159GLOBL expandAVX512_48_inShuf2<>(SB), RODATA, $0x40
  2160DATA  expandAVX512_48_inShuf2<>+0x00(SB)/8, $0xffff010101010101
  2161DATA  expandAVX512_48_inShuf2<>+0x08(SB)/8, $0xffff020202020202
  2162DATA  expandAVX512_48_inShuf2<>+0x10(SB)/8, $0xffff020202020202
  2163DATA  expandAVX512_48_inShuf2<>+0x18(SB)/8, $0xffffffff02020202
  2164DATA  expandAVX512_48_inShuf2<>+0x20(SB)/8, $0xffffffff02020202
  2165DATA  expandAVX512_48_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
  2166DATA  expandAVX512_48_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
  2167DATA  expandAVX512_48_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
  2168
  2169GLOBL expandAVX512_48_mat2<>(SB), RODATA, $0x40
  2170DATA  expandAVX512_48_mat2<>+0x00(SB)/8, $0x8080808080808080
  2171DATA  expandAVX512_48_mat2<>+0x08(SB)/8, $0x0101010101010101
  2172DATA  expandAVX512_48_mat2<>+0x10(SB)/8, $0x0202020202020202
  2173DATA  expandAVX512_48_mat2<>+0x18(SB)/8, $0x0808080808080808
  2174DATA  expandAVX512_48_mat2<>+0x20(SB)/8, $0x1010101010101010
  2175DATA  expandAVX512_48_mat2<>+0x28(SB)/8, $0x0000000000000000
  2176DATA  expandAVX512_48_mat2<>+0x30(SB)/8, $0x0000000000000000
  2177DATA  expandAVX512_48_mat2<>+0x38(SB)/8, $0x0000000000000000
  2178
  2179GLOBL expandAVX512_48_outShufLo(SB), RODATA, $0x40
  2180DATA  expandAVX512_48_outShufLo+0x00(SB)/8, $0x0908050403020100
  2181DATA  expandAVX512_48_outShufLo+0x08(SB)/8, $0x131211100d0c0b0a
  2182DATA  expandAVX512_48_outShufLo+0x10(SB)/8, $0x1d1c1b1a19181514
  2183DATA  expandAVX512_48_outShufLo+0x18(SB)/8, $0x2928252423222120
  2184DATA  expandAVX512_48_outShufLo+0x20(SB)/8, $0x333231302d2c2b2a
  2185DATA  expandAVX512_48_outShufLo+0x28(SB)/8, $0x3d3c3b3a39383534
  2186DATA  expandAVX512_48_outShufLo+0x30(SB)/8, $0x0f0e434241400706
  2187DATA  expandAVX512_48_outShufLo+0x38(SB)/8, $0x515017164b4a4948
  2188
  2189GLOBL expandAVX512_48_outShufHi(SB), RODATA, $0x40
  2190DATA  expandAVX512_48_outShufHi+0x00(SB)/8, $0x2524232221201918
  2191DATA  expandAVX512_48_outShufHi+0x08(SB)/8, $0x31302d2c2b2a2928
  2192DATA  expandAVX512_48_outShufHi+0x10(SB)/8, $0x3b3a393835343332
  2193DATA  expandAVX512_48_outShufHi+0x18(SB)/8, $0x4544434241403d3c
  2194DATA  expandAVX512_48_outShufHi+0x20(SB)/8, $0x51504d4c4b4a4948
  2195DATA  expandAVX512_48_outShufHi+0x28(SB)/8, $0x1d1c1b1a55545352
  2196DATA  expandAVX512_48_outShufHi+0x30(SB)/8, $0x5b5a595827261f1e
  2197DATA  expandAVX512_48_outShufHi+0x38(SB)/8, $0x3736636261602f2e
  2198
  2199TEXT expandAVX512_48<>(SB), NOSPLIT, $0-0
  2200	VMOVDQU64 expandAVX512_48_inShuf0<>(SB), Z0
  2201	VMOVDQU64 expandAVX512_48_inShuf1<>(SB), Z3
  2202	VMOVDQU64 expandAVX512_48_inShuf2<>(SB), Z4
  2203	VMOVDQU64 expandAVX512_48_outShufLo(SB), Z1
  2204	VMOVDQU64 expandAVX512_48_outShufHi(SB), Z2
  2205	VMOVDQU64 (AX), Z5
  2206	VPERMB Z5, Z0, Z0
  2207	VGF2P8AFFINEQB $0, expandAVX512_48_mat0<>(SB), Z0, Z0
  2208	VPERMB Z5, Z3, Z3
  2209	VGF2P8AFFINEQB $0, expandAVX512_48_mat1<>(SB), Z3, Z3
  2210	VPERMB Z5, Z4, Z4
  2211	VGF2P8AFFINEQB $0, expandAVX512_48_mat2<>(SB), Z4, Z4
  2212	VPERMI2B Z3, Z0, Z1
  2213	VPERMI2B Z4, Z3, Z2
  2214	RET
  2215
  2216GLOBL expandAVX512_52_inShuf0<>(SB), RODATA, $0x40
  2217DATA  expandAVX512_52_inShuf0<>+0x00(SB)/8, $0x0101000000000000
  2218DATA  expandAVX512_52_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
  2219DATA  expandAVX512_52_inShuf0<>+0x10(SB)/8, $0x0101000000000000
  2220DATA  expandAVX512_52_inShuf0<>+0x18(SB)/8, $0xffff000000000000
  2221DATA  expandAVX512_52_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
  2222DATA  expandAVX512_52_inShuf0<>+0x28(SB)/8, $0xffff000000000000
  2223DATA  expandAVX512_52_inShuf0<>+0x30(SB)/8, $0xffff000000000000
  2224DATA  expandAVX512_52_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
  2225
  2226GLOBL expandAVX512_52_mat0<>(SB), RODATA, $0x40
  2227DATA  expandAVX512_52_mat0<>+0x00(SB)/8, $0x0101010101010101
  2228DATA  expandAVX512_52_mat0<>+0x08(SB)/8, $0x0101010102020202
  2229DATA  expandAVX512_52_mat0<>+0x10(SB)/8, $0x0202020202020202
  2230DATA  expandAVX512_52_mat0<>+0x18(SB)/8, $0x0404040404040404
  2231DATA  expandAVX512_52_mat0<>+0x20(SB)/8, $0x0404040408080808
  2232DATA  expandAVX512_52_mat0<>+0x28(SB)/8, $0x0808080808080808
  2233DATA  expandAVX512_52_mat0<>+0x30(SB)/8, $0x1010101010101010
  2234DATA  expandAVX512_52_mat0<>+0x38(SB)/8, $0x1010101020202020
  2235
  2236GLOBL expandAVX512_52_inShuf1<>(SB), RODATA, $0x40
  2237DATA  expandAVX512_52_inShuf1<>+0x00(SB)/8, $0xffff000000000000
  2238DATA  expandAVX512_52_inShuf1<>+0x08(SB)/8, $0xffff000000000000
  2239DATA  expandAVX512_52_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
  2240DATA  expandAVX512_52_inShuf1<>+0x18(SB)/8, $0xffff000000000000
  2241DATA  expandAVX512_52_inShuf1<>+0x20(SB)/8, $0xffffffff01010101
  2242DATA  expandAVX512_52_inShuf1<>+0x28(SB)/8, $0xffffffffff010101
  2243DATA  expandAVX512_52_inShuf1<>+0x30(SB)/8, $0xff02020202020201
  2244DATA  expandAVX512_52_inShuf1<>+0x38(SB)/8, $0x0202010101010101
  2245
  2246GLOBL expandAVX512_52_mat1<>(SB), RODATA, $0x40
  2247DATA  expandAVX512_52_mat1<>+0x00(SB)/8, $0x2020202020202020
  2248DATA  expandAVX512_52_mat1<>+0x08(SB)/8, $0x4040404040404040
  2249DATA  expandAVX512_52_mat1<>+0x10(SB)/8, $0x4040404080808080
  2250DATA  expandAVX512_52_mat1<>+0x18(SB)/8, $0x8080808080808080
  2251DATA  expandAVX512_52_mat1<>+0x20(SB)/8, $0x0101010101010101
  2252DATA  expandAVX512_52_mat1<>+0x28(SB)/8, $0x0202020202020202
  2253DATA  expandAVX512_52_mat1<>+0x30(SB)/8, $0x0202020202020202
  2254DATA  expandAVX512_52_mat1<>+0x38(SB)/8, $0x0404040404040404
  2255
  2256GLOBL expandAVX512_52_inShuf2<>(SB), RODATA, $0x40
  2257DATA  expandAVX512_52_inShuf2<>+0x00(SB)/8, $0xffffffffffff0201
  2258DATA  expandAVX512_52_inShuf2<>+0x08(SB)/8, $0x0202010101010101
  2259DATA  expandAVX512_52_inShuf2<>+0x10(SB)/8, $0xffff010101010101
  2260DATA  expandAVX512_52_inShuf2<>+0x18(SB)/8, $0xffffffffffffff01
  2261DATA  expandAVX512_52_inShuf2<>+0x20(SB)/8, $0xffff010101010101
  2262DATA  expandAVX512_52_inShuf2<>+0x28(SB)/8, $0xffff010101010101
  2263DATA  expandAVX512_52_inShuf2<>+0x30(SB)/8, $0xffffffffffffff01
  2264DATA  expandAVX512_52_inShuf2<>+0x38(SB)/8, $0xffff010101010101
  2265
  2266GLOBL expandAVX512_52_mat2<>(SB), RODATA, $0x40
  2267DATA  expandAVX512_52_mat2<>+0x00(SB)/8, $0x0404040408080808
  2268DATA  expandAVX512_52_mat2<>+0x08(SB)/8, $0x0808080808080808
  2269DATA  expandAVX512_52_mat2<>+0x10(SB)/8, $0x1010101010101010
  2270DATA  expandAVX512_52_mat2<>+0x18(SB)/8, $0x1010101020202020
  2271DATA  expandAVX512_52_mat2<>+0x20(SB)/8, $0x2020202020202020
  2272DATA  expandAVX512_52_mat2<>+0x28(SB)/8, $0x4040404040404040
  2273DATA  expandAVX512_52_mat2<>+0x30(SB)/8, $0x4040404080808080
  2274DATA  expandAVX512_52_mat2<>+0x38(SB)/8, $0x8080808080808080
  2275
  2276GLOBL expandAVX512_52_inShuf3<>(SB), RODATA, $0x40
  2277DATA  expandAVX512_52_inShuf3<>+0x00(SB)/8, $0xffff020202020202
  2278DATA  expandAVX512_52_inShuf3<>+0x08(SB)/8, $0xffffffffffffff02
  2279DATA  expandAVX512_52_inShuf3<>+0x10(SB)/8, $0xffffffff02020202
  2280DATA  expandAVX512_52_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
  2281DATA  expandAVX512_52_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  2282DATA  expandAVX512_52_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  2283DATA  expandAVX512_52_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  2284DATA  expandAVX512_52_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  2285
  2286GLOBL expandAVX512_52_mat3<>(SB), RODATA, $0x40
  2287DATA  expandAVX512_52_mat3<>+0x00(SB)/8, $0x0101010101010101
  2288DATA  expandAVX512_52_mat3<>+0x08(SB)/8, $0x0101010102020202
  2289DATA  expandAVX512_52_mat3<>+0x10(SB)/8, $0x0404040404040404
  2290DATA  expandAVX512_52_mat3<>+0x18(SB)/8, $0x0808080808080808
  2291DATA  expandAVX512_52_mat3<>+0x20(SB)/8, $0x0000000000000000
  2292DATA  expandAVX512_52_mat3<>+0x28(SB)/8, $0x0000000000000000
  2293DATA  expandAVX512_52_mat3<>+0x30(SB)/8, $0x0000000000000000
  2294DATA  expandAVX512_52_mat3<>+0x38(SB)/8, $0x0000000000000000
  2295
  2296GLOBL expandAVX512_52_outShufLo(SB), RODATA, $0x40
  2297DATA  expandAVX512_52_outShufLo+0x00(SB)/8, $0x1008050403020100
  2298DATA  expandAVX512_52_outShufLo+0x08(SB)/8, $0x1a19181514131211
  2299DATA  expandAVX512_52_outShufLo+0x10(SB)/8, $0x2b2a2928201d1c1b
  2300DATA  expandAVX512_52_outShufLo+0x18(SB)/8, $0x3534333231302d2c
  2301DATA  expandAVX512_52_outShufLo+0x20(SB)/8, $0x4845444342414038
  2302DATA  expandAVX512_52_outShufLo+0x28(SB)/8, $0x5958504d4c4b4a49
  2303DATA  expandAVX512_52_outShufLo+0x30(SB)/8, $0x616007065d5c5b5a
  2304DATA  expandAVX512_52_outShufLo+0x38(SB)/8, $0x6a69681716096362
  2305
  2306GLOBL expandAVX512_52_outShufHi0(SB), RODATA, $0x40
  2307DATA  expandAVX512_52_outShufHi0+0x00(SB)/8, $0x403d3c3b3a393830
  2308DATA  expandAVX512_52_outShufHi0+0x08(SB)/8, $0x51504d4c4b4a4948
  2309DATA  expandAVX512_52_outShufHi0+0x10(SB)/8, $0x6261605855545352
  2310DATA  expandAVX512_52_outShufHi0+0x18(SB)/8, $0x6c6b6a6968656463
  2311DATA  expandAVX512_52_outShufHi0+0x20(SB)/8, $0x7d7c7b7a7978706d
  2312DATA  expandAVX512_52_outShufHi0+0x28(SB)/8, $0x31ffffffffffffff
  2313DATA  expandAVX512_52_outShufHi0+0x30(SB)/8, $0xff3f3e3635343332
  2314DATA  expandAVX512_52_outShufHi0+0x38(SB)/8, $0xffff4f4e41ffffff
  2315
  2316GLOBL expandAVX512_52_outShufHi1(SB), RODATA, $0x40
  2317DATA  expandAVX512_52_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  2318DATA  expandAVX512_52_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  2319DATA  expandAVX512_52_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  2320DATA  expandAVX512_52_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  2321DATA  expandAVX512_52_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  2322DATA  expandAVX512_52_outShufHi1+0x28(SB)/8, $0xff08050403020100
  2323DATA  expandAVX512_52_outShufHi1+0x30(SB)/8, $0x10ffffffffffffff
  2324DATA  expandAVX512_52_outShufHi1+0x38(SB)/8, $0x1918ffffff131211
  2325
  2326TEXT expandAVX512_52<>(SB), NOSPLIT, $0-0
  2327	VMOVDQU64 expandAVX512_52_inShuf0<>(SB), Z0
  2328	VMOVDQU64 expandAVX512_52_inShuf1<>(SB), Z2
  2329	VMOVDQU64 expandAVX512_52_inShuf2<>(SB), Z3
  2330	VMOVDQU64 expandAVX512_52_inShuf3<>(SB), Z4
  2331	VMOVDQU64 expandAVX512_52_outShufLo(SB), Z1
  2332	VMOVDQU64 expandAVX512_52_outShufHi0(SB), Z5
  2333	VMOVDQU64 expandAVX512_52_outShufHi1(SB), Z6
  2334	VMOVDQU64 (AX), Z7
  2335	VPERMB Z7, Z0, Z0
  2336	VGF2P8AFFINEQB $0, expandAVX512_52_mat0<>(SB), Z0, Z0
  2337	VPERMB Z7, Z2, Z2
  2338	VGF2P8AFFINEQB $0, expandAVX512_52_mat1<>(SB), Z2, Z2
  2339	VPERMB Z7, Z3, Z3
  2340	VGF2P8AFFINEQB $0, expandAVX512_52_mat2<>(SB), Z3, Z3
  2341	VPERMB Z7, Z4, Z4
  2342	VGF2P8AFFINEQB $0, expandAVX512_52_mat3<>(SB), Z4, Z4
  2343	VPERMI2B Z2, Z0, Z1
  2344	MOVQ $0x387f80ffffffffff, AX
  2345	KMOVQ AX, K1
  2346	VPERMI2B.Z Z3, Z2, K1, Z5
  2347	MOVQ $0xc7807f0000000000, AX
  2348	KMOVQ AX, K1
  2349	VPERMB.Z Z4, Z6, K1, Z0
  2350	VPORQ Z0, Z5, Z2
  2351	RET
  2352
  2353GLOBL expandAVX512_56_inShuf0<>(SB), RODATA, $0x40
  2354DATA  expandAVX512_56_inShuf0<>+0x00(SB)/8, $0x0100000000000000
  2355DATA  expandAVX512_56_inShuf0<>+0x08(SB)/8, $0x0100000000000000
  2356DATA  expandAVX512_56_inShuf0<>+0x10(SB)/8, $0xff00000000000000
  2357DATA  expandAVX512_56_inShuf0<>+0x18(SB)/8, $0xff00000000000000
  2358DATA  expandAVX512_56_inShuf0<>+0x20(SB)/8, $0xff00000000000000
  2359DATA  expandAVX512_56_inShuf0<>+0x28(SB)/8, $0xff00000000000000
  2360DATA  expandAVX512_56_inShuf0<>+0x30(SB)/8, $0xff00000000000000
  2361DATA  expandAVX512_56_inShuf0<>+0x38(SB)/8, $0xff00000000000000
  2362
  2363GLOBL expandAVX512_56_mat0<>(SB), RODATA, $0x40
  2364DATA  expandAVX512_56_mat0<>+0x00(SB)/8, $0x0101010101010101
  2365DATA  expandAVX512_56_mat0<>+0x08(SB)/8, $0x0202020202020202
  2366DATA  expandAVX512_56_mat0<>+0x10(SB)/8, $0x0404040404040404
  2367DATA  expandAVX512_56_mat0<>+0x18(SB)/8, $0x0808080808080808
  2368DATA  expandAVX512_56_mat0<>+0x20(SB)/8, $0x1010101010101010
  2369DATA  expandAVX512_56_mat0<>+0x28(SB)/8, $0x2020202020202020
  2370DATA  expandAVX512_56_mat0<>+0x30(SB)/8, $0x4040404040404040
  2371DATA  expandAVX512_56_mat0<>+0x38(SB)/8, $0x8080808080808080
  2372
  2373GLOBL expandAVX512_56_inShuf1<>(SB), RODATA, $0x40
  2374DATA  expandAVX512_56_inShuf1<>+0x00(SB)/8, $0xffff010101010101
  2375DATA  expandAVX512_56_inShuf1<>+0x08(SB)/8, $0x0202010101010101
  2376DATA  expandAVX512_56_inShuf1<>+0x10(SB)/8, $0x0201010101010101
  2377DATA  expandAVX512_56_inShuf1<>+0x18(SB)/8, $0xff01010101010101
  2378DATA  expandAVX512_56_inShuf1<>+0x20(SB)/8, $0xff01010101010101
  2379DATA  expandAVX512_56_inShuf1<>+0x28(SB)/8, $0xff01010101010101
  2380DATA  expandAVX512_56_inShuf1<>+0x30(SB)/8, $0xff01010101010101
  2381DATA  expandAVX512_56_inShuf1<>+0x38(SB)/8, $0xff01010101010101
  2382
  2383GLOBL expandAVX512_56_inShuf2<>(SB), RODATA, $0x40
  2384DATA  expandAVX512_56_inShuf2<>+0x00(SB)/8, $0xff02020202020202
  2385DATA  expandAVX512_56_inShuf2<>+0x08(SB)/8, $0xffffff0202020202
  2386DATA  expandAVX512_56_inShuf2<>+0x10(SB)/8, $0xffffffffffffff02
  2387DATA  expandAVX512_56_inShuf2<>+0x18(SB)/8, $0xffffffffffffffff
  2388DATA  expandAVX512_56_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
  2389DATA  expandAVX512_56_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
  2390DATA  expandAVX512_56_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
  2391DATA  expandAVX512_56_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
  2392
  2393GLOBL expandAVX512_56_mat2<>(SB), RODATA, $0x40
  2394DATA  expandAVX512_56_mat2<>+0x00(SB)/8, $0x0101010101010101
  2395DATA  expandAVX512_56_mat2<>+0x08(SB)/8, $0x0202020202020202
  2396DATA  expandAVX512_56_mat2<>+0x10(SB)/8, $0x0404040404040404
  2397DATA  expandAVX512_56_mat2<>+0x18(SB)/8, $0x0000000000000000
  2398DATA  expandAVX512_56_mat2<>+0x20(SB)/8, $0x0000000000000000
  2399DATA  expandAVX512_56_mat2<>+0x28(SB)/8, $0x0000000000000000
  2400DATA  expandAVX512_56_mat2<>+0x30(SB)/8, $0x0000000000000000
  2401DATA  expandAVX512_56_mat2<>+0x38(SB)/8, $0x0000000000000000
  2402
  2403GLOBL expandAVX512_56_outShufLo(SB), RODATA, $0x40
  2404DATA  expandAVX512_56_outShufLo+0x00(SB)/8, $0x0806050403020100
  2405DATA  expandAVX512_56_outShufLo+0x08(SB)/8, $0x11100e0d0c0b0a09
  2406DATA  expandAVX512_56_outShufLo+0x10(SB)/8, $0x1a19181615141312
  2407DATA  expandAVX512_56_outShufLo+0x18(SB)/8, $0x232221201e1d1c1b
  2408DATA  expandAVX512_56_outShufLo+0x20(SB)/8, $0x2c2b2a2928262524
  2409DATA  expandAVX512_56_outShufLo+0x28(SB)/8, $0x3534333231302e2d
  2410DATA  expandAVX512_56_outShufLo+0x30(SB)/8, $0x3e3d3c3b3a393836
  2411DATA  expandAVX512_56_outShufLo+0x38(SB)/8, $0x0f45444342414007
  2412
  2413GLOBL expandAVX512_56_outShufHi(SB), RODATA, $0x40
  2414DATA  expandAVX512_56_outShufHi+0x00(SB)/8, $0x11100d0c0b0a0908
  2415DATA  expandAVX512_56_outShufHi+0x08(SB)/8, $0x1a19181615141312
  2416DATA  expandAVX512_56_outShufHi+0x10(SB)/8, $0x232221201e1d1c1b
  2417DATA  expandAVX512_56_outShufHi+0x18(SB)/8, $0x2c2b2a2928262524
  2418DATA  expandAVX512_56_outShufHi+0x20(SB)/8, $0x3534333231302e2d
  2419DATA  expandAVX512_56_outShufHi+0x28(SB)/8, $0x3e3d3c3b3a393836
  2420DATA  expandAVX512_56_outShufHi+0x30(SB)/8, $0x0e46454443424140
  2421DATA  expandAVX512_56_outShufHi+0x38(SB)/8, $0x50174c4b4a49480f
  2422
  2423TEXT expandAVX512_56<>(SB), NOSPLIT, $0-0
  2424	VMOVDQU64 expandAVX512_56_inShuf0<>(SB), Z0
  2425	VMOVDQU64 expandAVX512_56_mat0<>(SB), Z3
  2426	VMOVDQU64 expandAVX512_56_inShuf1<>(SB), Z4
  2427	VMOVDQU64 expandAVX512_56_inShuf2<>(SB), Z5
  2428	VMOVDQU64 expandAVX512_56_outShufLo(SB), Z1
  2429	VMOVDQU64 expandAVX512_56_outShufHi(SB), Z2
  2430	VMOVDQU64 (AX), Z6
  2431	VPERMB Z6, Z0, Z0
  2432	VGF2P8AFFINEQB $0, Z3, Z0, Z0
  2433	VPERMB Z6, Z4, Z4
  2434	VGF2P8AFFINEQB $0, Z3, Z4, Z3
  2435	VPERMB Z6, Z5, Z4
  2436	VGF2P8AFFINEQB $0, expandAVX512_56_mat2<>(SB), Z4, Z4
  2437	VPERMI2B Z3, Z0, Z1
  2438	VPERMI2B Z4, Z3, Z2
  2439	RET
  2440
  2441GLOBL expandAVX512_60_inShuf0<>(SB), RODATA, $0x40
  2442DATA  expandAVX512_60_inShuf0<>+0x00(SB)/8, $0x0100000000000000
  2443DATA  expandAVX512_60_inShuf0<>+0x08(SB)/8, $0xffffffffffffff00
  2444DATA  expandAVX512_60_inShuf0<>+0x10(SB)/8, $0xff00000000000000
  2445DATA  expandAVX512_60_inShuf0<>+0x18(SB)/8, $0xff00000000000000
  2446DATA  expandAVX512_60_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
  2447DATA  expandAVX512_60_inShuf0<>+0x28(SB)/8, $0xff00000000000000
  2448DATA  expandAVX512_60_inShuf0<>+0x30(SB)/8, $0xff00000000000000
  2449DATA  expandAVX512_60_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
  2450
  2451GLOBL expandAVX512_60_mat0<>(SB), RODATA, $0x40
  2452DATA  expandAVX512_60_mat0<>+0x00(SB)/8, $0x0101010101010101
  2453DATA  expandAVX512_60_mat0<>+0x08(SB)/8, $0x0101010102020202
  2454DATA  expandAVX512_60_mat0<>+0x10(SB)/8, $0x0202020202020202
  2455DATA  expandAVX512_60_mat0<>+0x18(SB)/8, $0x0404040404040404
  2456DATA  expandAVX512_60_mat0<>+0x20(SB)/8, $0x0404040408080808
  2457DATA  expandAVX512_60_mat0<>+0x28(SB)/8, $0x0808080808080808
  2458DATA  expandAVX512_60_mat0<>+0x30(SB)/8, $0x1010101010101010
  2459DATA  expandAVX512_60_mat0<>+0x38(SB)/8, $0x1010101020202020
  2460
  2461GLOBL expandAVX512_60_inShuf1<>(SB), RODATA, $0x40
  2462DATA  expandAVX512_60_inShuf1<>+0x00(SB)/8, $0xff00000000000000
  2463DATA  expandAVX512_60_inShuf1<>+0x08(SB)/8, $0xff00000000000000
  2464DATA  expandAVX512_60_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
  2465DATA  expandAVX512_60_inShuf1<>+0x18(SB)/8, $0xff00000000000000
  2466DATA  expandAVX512_60_inShuf1<>+0x20(SB)/8, $0xffffffffff010101
  2467DATA  expandAVX512_60_inShuf1<>+0x28(SB)/8, $0x0202020202010101
  2468DATA  expandAVX512_60_inShuf1<>+0x30(SB)/8, $0xffffffffffff0201
  2469DATA  expandAVX512_60_inShuf1<>+0x38(SB)/8, $0xff01010101010101
  2470
  2471GLOBL expandAVX512_60_mat1<>(SB), RODATA, $0x40
  2472DATA  expandAVX512_60_mat1<>+0x00(SB)/8, $0x2020202020202020
  2473DATA  expandAVX512_60_mat1<>+0x08(SB)/8, $0x4040404040404040
  2474DATA  expandAVX512_60_mat1<>+0x10(SB)/8, $0x4040404080808080
  2475DATA  expandAVX512_60_mat1<>+0x18(SB)/8, $0x8080808080808080
  2476DATA  expandAVX512_60_mat1<>+0x20(SB)/8, $0x0101010101010101
  2477DATA  expandAVX512_60_mat1<>+0x28(SB)/8, $0x0101010101010101
  2478DATA  expandAVX512_60_mat1<>+0x30(SB)/8, $0x0101010102020202
  2479DATA  expandAVX512_60_mat1<>+0x38(SB)/8, $0x0202020202020202
  2480
  2481GLOBL expandAVX512_60_inShuf2<>(SB), RODATA, $0x40
  2482DATA  expandAVX512_60_inShuf2<>+0x00(SB)/8, $0xff01010101010101
  2483DATA  expandAVX512_60_inShuf2<>+0x08(SB)/8, $0xffffffffffffff01
  2484DATA  expandAVX512_60_inShuf2<>+0x10(SB)/8, $0xff01010101010101
  2485DATA  expandAVX512_60_inShuf2<>+0x18(SB)/8, $0xff01010101010101
  2486DATA  expandAVX512_60_inShuf2<>+0x20(SB)/8, $0xffffffffffffff01
  2487DATA  expandAVX512_60_inShuf2<>+0x28(SB)/8, $0xff01010101010101
  2488DATA  expandAVX512_60_inShuf2<>+0x30(SB)/8, $0xff01010101010101
  2489DATA  expandAVX512_60_inShuf2<>+0x38(SB)/8, $0xffffffffffffff01
  2490
  2491GLOBL expandAVX512_60_mat2<>(SB), RODATA, $0x40
  2492DATA  expandAVX512_60_mat2<>+0x00(SB)/8, $0x0404040404040404
  2493DATA  expandAVX512_60_mat2<>+0x08(SB)/8, $0x0404040408080808
  2494DATA  expandAVX512_60_mat2<>+0x10(SB)/8, $0x0808080808080808
  2495DATA  expandAVX512_60_mat2<>+0x18(SB)/8, $0x1010101010101010
  2496DATA  expandAVX512_60_mat2<>+0x20(SB)/8, $0x1010101020202020
  2497DATA  expandAVX512_60_mat2<>+0x28(SB)/8, $0x2020202020202020
  2498DATA  expandAVX512_60_mat2<>+0x30(SB)/8, $0x4040404040404040
  2499DATA  expandAVX512_60_mat2<>+0x38(SB)/8, $0x4040404080808080
  2500
  2501GLOBL expandAVX512_60_inShuf3<>(SB), RODATA, $0x40
  2502DATA  expandAVX512_60_inShuf3<>+0x00(SB)/8, $0xff01010101010101
  2503DATA  expandAVX512_60_inShuf3<>+0x08(SB)/8, $0xffffffffffff0202
  2504DATA  expandAVX512_60_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
  2505DATA  expandAVX512_60_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
  2506DATA  expandAVX512_60_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  2507DATA  expandAVX512_60_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  2508DATA  expandAVX512_60_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  2509DATA  expandAVX512_60_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  2510
  2511GLOBL expandAVX512_60_mat3<>(SB), RODATA, $0x40
  2512DATA  expandAVX512_60_mat3<>+0x00(SB)/8, $0x8080808080808080
  2513DATA  expandAVX512_60_mat3<>+0x08(SB)/8, $0x0101010101010101
  2514DATA  expandAVX512_60_mat3<>+0x10(SB)/8, $0x0000000000000000
  2515DATA  expandAVX512_60_mat3<>+0x18(SB)/8, $0x0000000000000000
  2516DATA  expandAVX512_60_mat3<>+0x20(SB)/8, $0x0000000000000000
  2517DATA  expandAVX512_60_mat3<>+0x28(SB)/8, $0x0000000000000000
  2518DATA  expandAVX512_60_mat3<>+0x30(SB)/8, $0x0000000000000000
  2519DATA  expandAVX512_60_mat3<>+0x38(SB)/8, $0x0000000000000000
  2520
  2521GLOBL expandAVX512_60_outShufLo(SB), RODATA, $0x40
  2522DATA  expandAVX512_60_outShufLo+0x00(SB)/8, $0x0806050403020100
  2523DATA  expandAVX512_60_outShufLo+0x08(SB)/8, $0x1816151413121110
  2524DATA  expandAVX512_60_outShufLo+0x10(SB)/8, $0x28201e1d1c1b1a19
  2525DATA  expandAVX512_60_outShufLo+0x18(SB)/8, $0x31302e2d2c2b2a29
  2526DATA  expandAVX512_60_outShufLo+0x20(SB)/8, $0x4140383635343332
  2527DATA  expandAVX512_60_outShufLo+0x28(SB)/8, $0x4a49484645444342
  2528DATA  expandAVX512_60_outShufLo+0x30(SB)/8, $0x5a5958504e4d4c4b
  2529DATA  expandAVX512_60_outShufLo+0x38(SB)/8, $0x626160075e5d5c5b
  2530
  2531GLOBL expandAVX512_60_outShufHi0(SB), RODATA, $0x40
  2532DATA  expandAVX512_60_outShufHi0+0x00(SB)/8, $0x3b3a3938302a2928
  2533DATA  expandAVX512_60_outShufHi0+0x08(SB)/8, $0x44434241403e3d3c
  2534DATA  expandAVX512_60_outShufHi0+0x10(SB)/8, $0x5453525150484645
  2535DATA  expandAVX512_60_outShufHi0+0x18(SB)/8, $0x5d5c5b5a59585655
  2536DATA  expandAVX512_60_outShufHi0+0x20(SB)/8, $0x6d6c6b6a6968605e
  2537DATA  expandAVX512_60_outShufHi0+0x28(SB)/8, $0x767574737271706e
  2538DATA  expandAVX512_60_outShufHi0+0x30(SB)/8, $0xffffffffffffff78
  2539DATA  expandAVX512_60_outShufHi0+0x38(SB)/8, $0x31ffff2f2e2d2c2b
  2540
  2541GLOBL expandAVX512_60_outShufHi1(SB), RODATA, $0x40
  2542DATA  expandAVX512_60_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  2543DATA  expandAVX512_60_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  2544DATA  expandAVX512_60_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  2545DATA  expandAVX512_60_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  2546DATA  expandAVX512_60_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  2547DATA  expandAVX512_60_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  2548DATA  expandAVX512_60_outShufHi1+0x30(SB)/8, $0x06050403020100ff
  2549DATA  expandAVX512_60_outShufHi1+0x38(SB)/8, $0xff0908ffffffffff
  2550
  2551TEXT expandAVX512_60<>(SB), NOSPLIT, $0-0
  2552	VMOVDQU64 expandAVX512_60_inShuf0<>(SB), Z0
  2553	VMOVDQU64 expandAVX512_60_inShuf1<>(SB), Z2
  2554	VMOVDQU64 expandAVX512_60_inShuf2<>(SB), Z3
  2555	VMOVDQU64 expandAVX512_60_inShuf3<>(SB), Z4
  2556	VMOVDQU64 expandAVX512_60_outShufLo(SB), Z1
  2557	VMOVDQU64 expandAVX512_60_outShufHi0(SB), Z5
  2558	VMOVDQU64 expandAVX512_60_outShufHi1(SB), Z6
  2559	VMOVDQU64 (AX), Z7
  2560	VPERMB Z7, Z0, Z0
  2561	VGF2P8AFFINEQB $0, expandAVX512_60_mat0<>(SB), Z0, Z0
  2562	VPERMB Z7, Z2, Z2
  2563	VGF2P8AFFINEQB $0, expandAVX512_60_mat1<>(SB), Z2, Z2
  2564	VPERMB Z7, Z3, Z3
  2565	VGF2P8AFFINEQB $0, expandAVX512_60_mat2<>(SB), Z3, Z3
  2566	VPERMB Z7, Z4, Z4
  2567	VGF2P8AFFINEQB $0, expandAVX512_60_mat3<>(SB), Z4, Z4
  2568	VPERMI2B Z2, Z0, Z1
  2569	MOVQ $0x9f01ffffffffffff, AX
  2570	KMOVQ AX, K1
  2571	VPERMI2B.Z Z3, Z2, K1, Z5
  2572	MOVQ $0x60fe000000000000, AX
  2573	KMOVQ AX, K1
  2574	VPERMB.Z Z4, Z6, K1, Z0
  2575	VPORQ Z0, Z5, Z2
  2576	RET
  2577
  2578GLOBL expandAVX512_64_inShuf0<>(SB), RODATA, $0x40
  2579DATA  expandAVX512_64_inShuf0<>+0x00(SB)/8, $0x0000000000000000
  2580DATA  expandAVX512_64_inShuf0<>+0x08(SB)/8, $0x0000000000000000
  2581DATA  expandAVX512_64_inShuf0<>+0x10(SB)/8, $0x0000000000000000
  2582DATA  expandAVX512_64_inShuf0<>+0x18(SB)/8, $0x0000000000000000
  2583DATA  expandAVX512_64_inShuf0<>+0x20(SB)/8, $0x0000000000000000
  2584DATA  expandAVX512_64_inShuf0<>+0x28(SB)/8, $0x0000000000000000
  2585DATA  expandAVX512_64_inShuf0<>+0x30(SB)/8, $0x0000000000000000
  2586DATA  expandAVX512_64_inShuf0<>+0x38(SB)/8, $0x0000000000000000
  2587
  2588GLOBL expandAVX512_64_mat0<>(SB), RODATA, $0x40
  2589DATA  expandAVX512_64_mat0<>+0x00(SB)/8, $0x0101010101010101
  2590DATA  expandAVX512_64_mat0<>+0x08(SB)/8, $0x0202020202020202
  2591DATA  expandAVX512_64_mat0<>+0x10(SB)/8, $0x0404040404040404
  2592DATA  expandAVX512_64_mat0<>+0x18(SB)/8, $0x0808080808080808
  2593DATA  expandAVX512_64_mat0<>+0x20(SB)/8, $0x1010101010101010
  2594DATA  expandAVX512_64_mat0<>+0x28(SB)/8, $0x2020202020202020
  2595DATA  expandAVX512_64_mat0<>+0x30(SB)/8, $0x4040404040404040
  2596DATA  expandAVX512_64_mat0<>+0x38(SB)/8, $0x8080808080808080
  2597
  2598GLOBL expandAVX512_64_inShuf1<>(SB), RODATA, $0x40
  2599DATA  expandAVX512_64_inShuf1<>+0x00(SB)/8, $0x0101010101010101
  2600DATA  expandAVX512_64_inShuf1<>+0x08(SB)/8, $0x0101010101010101
  2601DATA  expandAVX512_64_inShuf1<>+0x10(SB)/8, $0x0101010101010101
  2602DATA  expandAVX512_64_inShuf1<>+0x18(SB)/8, $0x0101010101010101
  2603DATA  expandAVX512_64_inShuf1<>+0x20(SB)/8, $0x0101010101010101
  2604DATA  expandAVX512_64_inShuf1<>+0x28(SB)/8, $0x0101010101010101
  2605DATA  expandAVX512_64_inShuf1<>+0x30(SB)/8, $0x0101010101010101
  2606DATA  expandAVX512_64_inShuf1<>+0x38(SB)/8, $0x0101010101010101
  2607
  2608GLOBL expandAVX512_64_outShufLo(SB), RODATA, $0x40
  2609DATA  expandAVX512_64_outShufLo+0x00(SB)/8, $0x0706050403020100
  2610DATA  expandAVX512_64_outShufLo+0x08(SB)/8, $0x0f0e0d0c0b0a0908
  2611DATA  expandAVX512_64_outShufLo+0x10(SB)/8, $0x1716151413121110
  2612DATA  expandAVX512_64_outShufLo+0x18(SB)/8, $0x1f1e1d1c1b1a1918
  2613DATA  expandAVX512_64_outShufLo+0x20(SB)/8, $0x2726252423222120
  2614DATA  expandAVX512_64_outShufLo+0x28(SB)/8, $0x2f2e2d2c2b2a2928
  2615DATA  expandAVX512_64_outShufLo+0x30(SB)/8, $0x3736353433323130
  2616DATA  expandAVX512_64_outShufLo+0x38(SB)/8, $0x3f3e3d3c3b3a3938
  2617
  2618TEXT expandAVX512_64<>(SB), NOSPLIT, $0-0
  2619	VMOVDQU64 expandAVX512_64_inShuf0<>(SB), Z0
  2620	VMOVDQU64 expandAVX512_64_mat0<>(SB), Z1
  2621	VMOVDQU64 expandAVX512_64_inShuf1<>(SB), Z2
  2622	VMOVDQU64 expandAVX512_64_outShufLo(SB), Z3
  2623	VMOVDQU64 (AX), Z4
  2624	VPERMB Z4, Z0, Z0
  2625	VGF2P8AFFINEQB $0, Z1, Z0, Z0
  2626	VPERMB Z4, Z2, Z2
  2627	VGF2P8AFFINEQB $0, Z1, Z2, Z2
  2628	VPERMB Z0, Z3, Z1
  2629	VPERMB Z2, Z3, Z2
  2630	RET
  2631

View as plain text