...
Run Format

Text file src/internal/chacha8rand/chacha8_riscv64.s

Documentation: internal/chacha8rand

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "asm_riscv64.h"
     6#include "go_asm.h"
     7#include "textflag.h"
     8
     9// TODO(mzh): use Zvkb if possible
    10
    11#define QR(A, B, C, D) \
    12	VADDVV	A, B, A \
    13	VXORVV	D, A, D \
    14	VSLLVI	$16, D, V28 \
    15	VSRLVI	$16, D, D \
    16	VXORVV	V28, D, D \
    17	VADDVV	D, C, C  \
    18	VXORVV	C, B, B \
    19	VSLLVI	$12, B, V29 \
    20	VSRLVI	$20, B, B \
    21	VXORVV	V29, B, B \
    22	VADDVV	B, A, A  \
    23	VXORVV	A, D, D \
    24	VSLLVI	$8, D, V30 \
    25	VSRLVI	$24, D, D \
    26	VXORVV	V30, D, D \
    27	VADDVV	D, C, C  \
    28	VXORVV	C, B, B \
    29	VSLLVI	$7, B, V31 \
    30	VSRLVI	$25, B, B \
    31	VXORVV	V31, B, B
    32
    33// block runs four ChaCha8 block transformations using four elements in each V register.
    34// func block(seed *[8]uint32, blocks *[16][4]uint32, counter uint32)
    35TEXT ·block<ABIInternal>(SB), NOSPLIT, $0
    36	// seed in X10
    37	// blocks in X11
    38	// counter in X12
    39
    40#ifndef hasV
    41	MOVB	internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X13
    42	BNEZ	X13, vector_chacha8
    43	JMP	·block_generic<ABIInternal>(SB)
    44#endif
    45
    46vector_chacha8:
    47	// At least VLEN >= 128
    48	VSETIVLI	$4, E32, M1, TA, MA, X0
    49	// Load initial constants into top row.
    50	MOV $·chachaConst(SB), X14
    51	VLSSEG4E32V	(X14), X0, V0 // V0, V1, V2, V3 = const row
    52	VLSSEG8E32V	(X10), X0, V4 // V4 ... V11, seed
    53	VIDV	V12
    54	VADDVX	X12, V12, V12		// counter
    55
    56	// Clear all nonces.
    57	VXORVV	V13, V13, V13
    58	VXORVV	V14, V14, V14
    59	VXORVV	V15, V15, V15
    60
    61	// Copy initial state.
    62	VMV4RV V4, V20
    63	VMV4RV V8, V24
    64
    65	MOV	$4, X15
    66	PCALIGN	$16
    67loop:
    68	QR(V0, V4, V8, V12)
    69	QR(V1, V5, V9, V13)
    70	QR(V2, V6, V10, V14)
    71	QR(V3, V7, V11, V15)
    72
    73	QR(V0, V5, V10, V15)
    74	QR(V1, V6, V11, V12)
    75	QR(V2, V7, V8, V13)
    76	QR(V3, V4, V9, V14)
    77
    78	SUB	$1, X15
    79	BNEZ	X15, loop
    80
    81	VADDVV	V20, V4, V4
    82	VADDVV	V21, V5, V5
    83	VADDVV	V22, V6, V6
    84	VADDVV	V23, V7, V7
    85	VADDVV	V24, V8, V8
    86	VADDVV	V25, V9, V9
    87	VADDVV	V26, V10, V10
    88	VADDVV	V27, V11, V11
    89
    90	VSE32V	V0, (X11); ADD $16, X11;
    91	VSE32V	V1, (X11); ADD $16, X11;
    92	VSE32V	V2, (X11); ADD $16, X11;
    93	VSE32V	V3, (X11); ADD $16, X11;
    94	VSE32V	V4, (X11); ADD $16, X11;
    95	VSE32V	V5, (X11); ADD $16, X11;
    96	VSE32V	V6, (X11); ADD $16, X11;
    97	VSE32V	V7, (X11); ADD $16, X11;
    98	VSE32V	V8, (X11); ADD $16, X11;
    99	VSE32V	V9, (X11); ADD $16, X11;
   100	VSE32V	V10, (X11); ADD $16, X11;
   101	VSE32V	V11, (X11); ADD $16, X11;
   102	VSE32V	V12, (X11); ADD $16, X11;
   103	VSE32V	V13, (X11); ADD $16, X11;
   104	VSE32V	V14, (X11); ADD $16, X11;
   105	VSE32V	V15, (X11); ADD $16, X11;
   106
   107	RET
   108
   109GLOBL	·chachaConst(SB), NOPTR|RODATA, $32
   110DATA	·chachaConst+0x00(SB)/4, $0x61707865
   111DATA	·chachaConst+0x04(SB)/4, $0x3320646e
   112DATA	·chachaConst+0x08(SB)/4, $0x79622d32
   113DATA	·chachaConst+0x0c(SB)/4, $0x6b206574

View as plain text