...
Run Format

Text file src/internal/chacha8rand/chacha8_loong64.s

Documentation: internal/chacha8rand

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8DATA	·chachaConst+0x00(SB)/4, $0x61707865
     9DATA	·chachaConst+0x04(SB)/4, $0x3320646e
    10DATA	·chachaConst+0x08(SB)/4, $0x79622d32
    11DATA	·chachaConst+0x0c(SB)/4, $0x6b206574
    12GLOBL	·chachaConst(SB), NOPTR|RODATA, $32
    13
    14DATA	·chachaIncRot+0x00(SB)/4, $0x00000000
    15DATA	·chachaIncRot+0x04(SB)/4, $0x00000001
    16DATA	·chachaIncRot+0x08(SB)/4, $0x00000002
    17DATA	·chachaIncRot+0x0c(SB)/4, $0x00000003
    18GLOBL	·chachaIncRot(SB), NOPTR|RODATA, $32
    19
    20// QR is the ChaCha8 quarter-round on a, b, c, and d.
    21#define QR(a, b, c, d) \
    22	VADDW	a, b, a; \
    23	VXORV	d, a, d; \
    24	VROTRW	$16, d; \
    25	VADDW	c, d, c; \
    26	VXORV	b, c, b; \
    27	VROTRW	$20, b; \
    28	VADDW	a, b, a; \
    29	VXORV	d, a, d; \
    30	VROTRW	$24, d; \
    31	VADDW	c, d, c; \
    32	VXORV	b, c, b; \
    33	VROTRW	$25, b
    34
    35
    36// func block(seed *[8]uint32, blocks *[4][16]uint32, counter uint32)
    37TEXT ·block<ABIInternal>(SB), NOSPLIT, $0
    38	// seed in R4
    39	// blocks in R5
    40	// counter in R6
    41
    42	MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R7
    43	BNE	R7, lsx_chacha8
    44	JMP	·block_generic<ABIInternal>(SB)
    45	RET
    46
    47lsx_chacha8:
    48	MOVV	$·chachaConst(SB), R10
    49	MOVV	$·chachaIncRot(SB), R11
    50
    51	// load contants
    52	// VLDREPL.W  $0, R10, V0
    53	WORD	$0x30200140
    54	// VLDREPL.W  $1, R10, V1
    55	WORD	$0x30200541
    56	// VLDREPL.W  $2, R10, V2
    57	WORD	$0x30200942
    58	// VLDREPL.W  $3, R10, V3
    59	WORD	$0x30200d43
    60
    61	// load 4-32bit data from incRotMatrix added to counter
    62	VMOVQ	(R11), V30
    63
    64	// load seed
    65	// VLDREPL.W  $0, R4, V4
    66	WORD	$0x30200084
    67	// VLDREPL.W  $1, R4, V5
    68	WORD	$0x30200485
    69	// VLDREPL.W  $2, R4, V6
    70	WORD	$0x30200886
    71	// VLDREPL.W  $3, R4, V7
    72	WORD	$0x30200c87
    73	// VLDREPL.W  $4, R4, V8
    74	WORD	$0x30201088
    75	// VLDREPL.W  $5, R4, V9
    76	WORD	$0x30201489
    77	// VLDREPL.W  $6, R4, V10
    78	WORD	$0x3020188a
    79	// VLDREPL.W  $7, R4, V11
    80	WORD	$0x30201c8b
    81
    82	// load counter and update counter
    83	VMOVQ	R6, V12.W4
    84	VADDW	V12, V30, V12
    85
    86	// zeros for remaining three matrix entries
    87	VXORV	V13, V13, V13
    88	VXORV	V14, V14, V14
    89	VXORV	V15, V15, V15
    90
    91	// save seed state for adding back later
    92	VORV	V4, V13, V20
    93	VORV	V5, V13, V21
    94	VORV	V6, V13, V22
    95	VORV	V7, V13, V23
    96	VORV	V8, V13, V24
    97	VORV	V9, V13, V25
    98	VORV	V10, V13, V26
    99	VORV	V11, V13, V27
   100
   101	// 4 iterations. Each iteration is 8 quarter-rounds.
   102	MOVV	$4, R7
   103loop:
   104	QR(V0, V4, V8, V12)
   105	QR(V1, V5, V9, V13)
   106	QR(V2, V6, V10, V14)
   107	QR(V3, V7, V11, V15)
   108
   109	QR(V0, V5, V10, V15)
   110	QR(V1, V6, V11, V12)
   111	QR(V2, V7, V8, V13)
   112	QR(V3, V4, V9, V14)
   113
   114	SUBV	$1, R7
   115	BNE	R7, R0, loop
   116
   117	// add seed back
   118	VADDW	V4, V20, V4
   119	VADDW	V5, V21, V5
   120	VADDW	V6, V22, V6
   121	VADDW	V7, V23, V7
   122	VADDW	V8, V24, V8
   123	VADDW	V9, V25, V9
   124	VADDW	V10, V26, V10
   125	VADDW	V11, V27, V11
   126
   127	// store blocks back to output buffer
   128	VMOVQ	V0, (R5)
   129	VMOVQ	V1, 16(R5)
   130	VMOVQ	V2, 32(R5)
   131	VMOVQ	V3, 48(R5)
   132	VMOVQ	V4, 64(R5)
   133	VMOVQ	V5, 80(R5)
   134	VMOVQ	V6, 96(R5)
   135	VMOVQ	V7, 112(R5)
   136	VMOVQ	V8, 128(R5)
   137	VMOVQ	V9, 144(R5)
   138	VMOVQ	V10, 160(R5)
   139	VMOVQ	V11, 176(R5)
   140	VMOVQ	V12, 192(R5)
   141	VMOVQ	V13, 208(R5)
   142	VMOVQ	V14, 224(R5)
   143	VMOVQ	V15, 240(R5)
   144
   145	RET

View as plain text