...
Run Format

Text file src/crypto/internal/fips140/sha512/sha512block_loong64.s

Documentation: crypto/internal/fips140/sha512

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// SHA512 block routine. See sha512block.go for Go equivalent.
    10
    11#define REGTMP	R30
    12#define REGTMP1	R16
    13#define REGTMP2	R17
    14#define REGTMP3	R18
    15#define REGTMP4	R7
    16#define REGTMP5	R6
    17
    18// W[i] = M[i]; for 0 <= i <= 15
    19#define LOAD0(index) \
    20	MOVV	(index*8)(R5), REGTMP4; \
    21	REVBV	REGTMP4, REGTMP4; \
    22	MOVV	REGTMP4, (index*8)(R3)
    23
    24// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 79
    25//   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    26//   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    27#define LOAD1(index) \
    28	MOVV	(((index-2)&0xf)*8)(R3), REGTMP4; \
    29	MOVV	(((index-15)&0xf)*8)(R3), REGTMP1; \
    30	MOVV	(((index-7)&0xf)*8)(R3), REGTMP; \
    31	MOVV	REGTMP4, REGTMP2; \
    32	MOVV	REGTMP4, REGTMP3; \
    33	ROTRV	$19, REGTMP4; \
    34	ROTRV	$61, REGTMP2; \
    35	SRLV	$6, REGTMP3; \
    36	XOR	REGTMP2, REGTMP4; \
    37	XOR	REGTMP3, REGTMP4; \
    38	ROTRV	$1, REGTMP1, REGTMP5; \
    39	SRLV	$7, REGTMP1, REGTMP2; \
    40	ROTRV	$8, REGTMP1; \
    41	ADDV	REGTMP, REGTMP4; \
    42	MOVV	(((index-16)&0xf)*8)(R3), REGTMP; \
    43	XOR	REGTMP1, REGTMP5; \
    44	XOR	REGTMP2, REGTMP5; \
    45	ADDV	REGTMP, REGTMP5; \
    46	ADDV	REGTMP5, REGTMP4; \
    47	MOVV	REGTMP4, ((index&0xf)*8)(R3)
    48
    49// h is also used as an accumulator. Wt is passed in REGTMP4.
    50//   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    51//     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
    52//     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    53//                 = ((y XOR z) AND x) XOR z
    54// Calculate T1 in REGTMP4
    55#define SHA512T1(const, e, f, g, h) \
    56	ADDV	$const, h; \
    57	ADDV	REGTMP4, h; \
    58	ROTRV	$14, e, REGTMP5; \
    59	ROTRV	$18, e, REGTMP; \
    60	ROTRV	$41, e, REGTMP3; \
    61	XOR	f, g, REGTMP2; \
    62	XOR	REGTMP, REGTMP5; \
    63	AND	e, REGTMP2; \
    64	XOR	REGTMP5, REGTMP3; \
    65	XOR	g, REGTMP2; \
    66	ADDV	REGTMP3, h; \
    67	ADDV	h, REGTMP2, REGTMP4
    68
    69// T2 = BIGSIGMA0(a) + Maj(a, b, c)
    70// BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
    71// Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
    72//              = ((y XOR z) AND x) XOR (y AND z)
    73// Calculate T2 in REGTMP1
    74#define SHA512T2(a, b, c) \
    75	ROTRV	$28, a, REGTMP5; \
    76	ROTRV	$34, a, REGTMP3; \
    77	ROTRV	$39, a, REGTMP2; \
    78	XOR	b, c, REGTMP; \
    79	AND	b, c, REGTMP1; \
    80	XOR	REGTMP3, REGTMP5; \
    81	AND	REGTMP, a, REGTMP; \
    82	XOR	REGTMP2, REGTMP5; \
    83	XOR	REGTMP, REGTMP1; \
    84	ADDV	REGTMP5, REGTMP1
    85
    86// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
    87// The values for e and a are stored in d and h, ready for rotation.
    88#define SHA512ROUND(const, a, b, c, d, e, f, g, h) \
    89	SHA512T1(const, e, f, g, h); \
    90	SHA512T2(a, b, c); \
    91	ADDV	REGTMP4, d; \
    92	ADDV	REGTMP1, REGTMP4, h
    93
    94#define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
    95	LOAD0(index); \
    96	SHA512ROUND(const, a, b, c, d, e, f, g, h)
    97
    98#define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
    99	LOAD1(index); \
   100	SHA512ROUND(const, a, b, c, d, e, f, g, h)
   101
   102// A stack frame size of 128 bytes is required here, because
   103// the frame size used for data expansion is 128 bytes.
   104// See the definition of the macro LOAD1 above (8 bytes * 16 entries).
   105//
   106// func block(dig *Digest, p []byte)
   107TEXT ·block(SB),NOSPLIT,$128-32
   108	MOVV	p_len+16(FP), R6
   109	MOVV	p_base+8(FP), R5
   110	AND	$~127, R6
   111	BEQ	R6, end
   112
   113	// p_len >= 128
   114	MOVV	dig+0(FP), R4
   115	ADDV	R5, R6, R25
   116	MOVV	(0*8)(R4), R8	// a = H0
   117	MOVV	(1*8)(R4), R9	// b = H1
   118	MOVV	(2*8)(R4), R10	// c = H2
   119	MOVV	(3*8)(R4), R11	// d = H3
   120	MOVV	(4*8)(R4), R12	// e = H4
   121	MOVV	(5*8)(R4), R13	// f = H5
   122	MOVV	(6*8)(R4), R14	// g = H6
   123	MOVV	(7*8)(R4), R15	// h = H7
   124
   125loop:
   126	SHA512ROUND0( 0, 0x428a2f98d728ae22, R8,  R9,  R10, R11, R12, R13, R14, R15)
   127	SHA512ROUND0( 1, 0x7137449123ef65cd, R15, R8,  R9,  R10, R11, R12, R13, R14)
   128	SHA512ROUND0( 2, 0xb5c0fbcfec4d3b2f, R14, R15, R8,  R9,  R10, R11, R12, R13)
   129	SHA512ROUND0( 3, 0xe9b5dba58189dbbc, R13, R14, R15, R8,  R9,  R10, R11, R12)
   130	SHA512ROUND0( 4, 0x3956c25bf348b538, R12, R13, R14, R15, R8,  R9,  R10, R11)
   131	SHA512ROUND0( 5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8,  R9,  R10)
   132	SHA512ROUND0( 6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8,  R9)
   133	SHA512ROUND0( 7, 0xab1c5ed5da6d8118, R9,  R10, R11, R12, R13, R14, R15, R8)
   134	SHA512ROUND0( 8, 0xd807aa98a3030242, R8,  R9,  R10, R11, R12, R13, R14, R15)
   135	SHA512ROUND0( 9, 0x12835b0145706fbe, R15, R8,  R9,  R10, R11, R12, R13, R14)
   136	SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8,  R9,  R10, R11, R12, R13)
   137	SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8,  R9,  R10, R11, R12)
   138	SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8,  R9,  R10, R11)
   139	SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8,  R9,  R10)
   140	SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8,  R9)
   141	SHA512ROUND0(15, 0xc19bf174cf692694, R9,  R10, R11, R12, R13, R14, R15, R8)
   142
   143	SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8,  R9,  R10, R11, R12, R13, R14, R15)
   144	SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8,  R9,  R10, R11, R12, R13, R14)
   145	SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8,  R9,  R10, R11, R12, R13)
   146	SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8,  R9,  R10, R11, R12)
   147	SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8,  R9,  R10, R11)
   148	SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8,  R9,  R10)
   149	SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8,  R9)
   150	SHA512ROUND1(23, 0x76f988da831153b5, R9,  R10, R11, R12, R13, R14, R15, R8)
   151	SHA512ROUND1(24, 0x983e5152ee66dfab, R8,  R9,  R10, R11, R12, R13, R14, R15)
   152	SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8,  R9,  R10, R11, R12, R13, R14)
   153	SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8,  R9,  R10, R11, R12, R13)
   154	SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8,  R9,  R10, R11, R12)
   155	SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8,  R9,  R10, R11)
   156	SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8,  R9,  R10)
   157	SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8,  R9)
   158	SHA512ROUND1(31, 0x142929670a0e6e70, R9,  R10, R11, R12, R13, R14, R15, R8)
   159	SHA512ROUND1(32, 0x27b70a8546d22ffc, R8,  R9,  R10, R11, R12, R13, R14, R15)
   160	SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8,  R9,  R10, R11, R12, R13, R14)
   161	SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8,  R9,  R10, R11, R12, R13)
   162	SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8,  R9,  R10, R11, R12)
   163	SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8,  R9,  R10, R11)
   164	SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8,  R9,  R10)
   165	SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8,  R9)
   166	SHA512ROUND1(39, 0x92722c851482353b, R9,  R10, R11, R12, R13, R14, R15, R8)
   167	SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8,  R9,  R10, R11, R12, R13, R14, R15)
   168	SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8,  R9,  R10, R11, R12, R13, R14)
   169	SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8,  R9,  R10, R11, R12, R13)
   170	SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8,  R9,  R10, R11, R12)
   171	SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8,  R9,  R10, R11)
   172	SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8,  R9,  R10)
   173	SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8,  R9)
   174	SHA512ROUND1(47, 0x106aa07032bbd1b8, R9,  R10, R11, R12, R13, R14, R15, R8)
   175	SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8,  R9,  R10, R11, R12, R13, R14, R15)
   176	SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8,  R9,  R10, R11, R12, R13, R14)
   177	SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8,  R9,  R10, R11, R12, R13)
   178	SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8,  R9,  R10, R11, R12)
   179	SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8,  R9,  R10, R11)
   180	SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   181	SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8,  R9)
   182	SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9,  R10, R11, R12, R13, R14, R15, R8)
   183	SHA512ROUND1(56, 0x748f82ee5defb2fc, R8,  R9,  R10, R11, R12, R13, R14, R15)
   184	SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8,  R9,  R10, R11, R12, R13, R14)
   185	SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8,  R9,  R10, R11, R12, R13)
   186	SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8,  R9,  R10, R11, R12)
   187	SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8,  R9,  R10, R11)
   188	SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8,  R9,  R10)
   189	SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8,  R9)
   190	SHA512ROUND1(63, 0xc67178f2e372532b, R9,  R10, R11, R12, R13, R14, R15, R8)
   191	SHA512ROUND1(64, 0xca273eceea26619c, R8,  R9,  R10, R11, R12, R13, R14, R15)
   192	SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8,  R9,  R10, R11, R12, R13, R14)
   193	SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8,  R9,  R10, R11, R12, R13)
   194	SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8,  R9,  R10, R11, R12)
   195	SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8,  R9,  R10, R11)
   196	SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8,  R9,  R10)
   197	SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8,  R9)
   198	SHA512ROUND1(71, 0x1b710b35131c471b, R9,  R10, R11, R12, R13, R14, R15, R8)
   199	SHA512ROUND1(72, 0x28db77f523047d84, R8,  R9,  R10, R11, R12, R13, R14, R15)
   200	SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8,  R9,  R10, R11, R12, R13, R14)
   201	SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8,  R9,  R10, R11, R12, R13)
   202	SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8,  R9,  R10, R11, R12)
   203	SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8,  R9,  R10, R11)
   204	SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8,  R9,  R10)
   205	SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8,  R9)
   206	SHA512ROUND1(79, 0x6c44198c4a475817, R9,  R10, R11, R12, R13, R14, R15, R8)
   207
   208	MOVV	(0*8)(R4), REGTMP
   209	MOVV	(1*8)(R4), REGTMP1
   210	MOVV	(2*8)(R4), REGTMP2
   211	MOVV	(3*8)(R4), REGTMP3
   212	ADDV	REGTMP, R8	// H0 = a + H0
   213	ADDV	REGTMP1, R9	// H1 = b + H1
   214	ADDV	REGTMP2, R10	// H2 = c + H2
   215	ADDV	REGTMP3, R11	// H3 = d + H3
   216	MOVV	R8, (0*8)(R4)
   217	MOVV	R9, (1*8)(R4)
   218	MOVV	R10, (2*8)(R4)
   219	MOVV	R11, (3*8)(R4)
   220	MOVV	(4*8)(R4), REGTMP
   221	MOVV	(5*8)(R4), REGTMP1
   222	MOVV	(6*8)(R4), REGTMP2
   223	MOVV	(7*8)(R4), REGTMP3
   224	ADDV	REGTMP, R12	// H4 = e + H4
   225	ADDV	REGTMP1, R13	// H5 = f + H5
   226	ADDV	REGTMP2, R14	// H6 = g + H6
   227	ADDV	REGTMP3, R15	// H7 = h + H7
   228	MOVV	R12, (4*8)(R4)
   229	MOVV	R13, (5*8)(R4)
   230	MOVV	R14, (6*8)(R4)
   231	MOVV	R15, (7*8)(R4)
   232
   233	ADDV	$128, R5
   234	BNE	R5, R25, loop
   235
   236end:
   237	RET

View as plain text