...
Run Format

Text file src/crypto/internal/fips140/sha256/sha256block_loong64.s

Documentation: crypto/internal/fips140/sha256

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// SHA256 block routine. See sha256block.go for Go equivalent.
    10//
    11// The algorithm is detailed in FIPS 180-4:
    12//
    13//  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    14//
    15// W[i] = M[i]; for 0 <= i <= 15
    16// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
    17//
    18// a = H0
    19// b = H1
    20// c = H2
    21// d = H3
    22// e = H4
    23// f = H5
    24// g = H6
    25// h = H7
    26//
    27// for i = 0 to 63 {
    28//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + K[i] + W[i]
    29//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    30//    h = g
    31//    g = f
    32//    f = e
    33//    e = d + T1
    34//    d = c
    35//    c = b
    36//    b = a
    37//    a = T1 + T2
    38// }
    39//
    40// H0 = a + H0
    41// H1 = b + H1
    42// H2 = c + H2
    43// H3 = d + H3
    44// H4 = e + H4
    45// H5 = f + H5
    46// H6 = g + H6
    47// H7 = h + H7
    48
    49#define REGTMP	R30
    50#define REGTMP1	R16
    51#define REGTMP2	R17
    52#define REGTMP3	R18
    53#define REGTMP4	R7
    54#define REGTMP5	R6
    55
    56// W[i] = M[i]; for 0 <= i <= 15
    57#define LOAD0(index) \
    58	MOVW	(index*4)(R5), REGTMP4; \
    59	REVB2W	REGTMP4, REGTMP4; \
    60	MOVW	REGTMP4, (index*4)(R3)
    61
    62// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
    63//   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
    64//   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
    65#define LOAD1(index) \
    66	MOVW	(((index-2)&0xf)*4)(R3), REGTMP4; \
    67	MOVW	(((index-15)&0xf)*4)(R3), REGTMP1; \
    68	MOVW	(((index-7)&0xf)*4)(R3), REGTMP; \
    69	MOVW	REGTMP4, REGTMP2; \
    70	MOVW	REGTMP4, REGTMP3; \
    71	ROTR	$17, REGTMP4; \
    72	ROTR	$19, REGTMP2; \
    73	SRL	$10, REGTMP3; \
    74	XOR	REGTMP2, REGTMP4; \
    75	XOR	REGTMP3, REGTMP4; \
    76	ROTR	$7, REGTMP1, REGTMP5; \
    77	SRL	$3, REGTMP1, REGTMP3; \
    78	ROTR	$18, REGTMP1, REGTMP2; \
    79	ADD	REGTMP, REGTMP4; \
    80	MOVW	(((index-16)&0xf)*4)(R3), REGTMP; \
    81	XOR	REGTMP3, REGTMP5; \
    82	XOR	REGTMP2, REGTMP5; \
    83	ADD	REGTMP, REGTMP5; \
    84	ADD	REGTMP5, REGTMP4; \
    85	MOVW	REGTMP4, ((index&0xf)*4)(R3)
    86
    87// T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    88// BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
    89// Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    90//             = ((y XOR z) AND x) XOR z
    91// Calculate T1 in REGTMP4
    92#define SHA256T1(const, e, f, g, h) \
    93	ADDV	$const, h; \
    94	ADD	REGTMP4, h; \
    95	ROTR	$6, e, REGTMP5; \
    96	ROTR	$11, e, REGTMP; \
    97	ROTR	$25, e, REGTMP3; \
    98	XOR	f, g, REGTMP2; \
    99	XOR	REGTMP, REGTMP5; \
   100	AND	e, REGTMP2; \
   101	XOR	REGTMP5, REGTMP3; \
   102	XOR	g, REGTMP2; \
   103	ADD	REGTMP3, h; \
   104	ADD	h, REGTMP2, REGTMP4
   105
   106// T2 = BIGSIGMA0(a) + Maj(a, b, c)
   107// BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
   108// Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   109//              = ((y XOR z) AND x) XOR (y AND z)
   110// Calculate T2 in REGTMP1
   111#define SHA256T2(a, b, c) \
   112	ROTR	$2, a, REGTMP5; \
   113	ROTR	$13, a, REGTMP3; \
   114	ROTR	$22, a, REGTMP2; \
   115	XOR	b, c, REGTMP; \
   116	AND	b, c, REGTMP1; \
   117	XOR	REGTMP3, REGTMP5; \
   118	AND	REGTMP, a, REGTMP; \
   119	XOR	REGTMP2, REGTMP5; \
   120	XOR	REGTMP, REGTMP1; \
   121	ADD	REGTMP5, REGTMP1
   122
   123// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   124// The values for e and a are stored in d and h, ready for rotation.
   125#define SHA256ROUND(const, a, b, c, d, e, f, g, h) \
   126	SHA256T1(const, e, f, g, h); \
   127	SHA256T2(a, b, c); \
   128	ADD	REGTMP4, d; \
   129	ADD	REGTMP1, REGTMP4, h
   130
   131#define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
   132	LOAD0(index); \
   133	SHA256ROUND(const, a, b, c, d, e, f, g, h)
   134
   135#define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
   136	LOAD1(index); \
   137	SHA256ROUND(const, a, b, c, d, e, f, g, h)
   138
   139// A stack frame size of 64 bytes is required here, because
   140// the frame size used for data expansion is 64 bytes.
   141// See the definition of the macro LOAD1 above (4 bytes * 16 entries).
   142//
   143//func block(dig *Digest, p []byte)
   144TEXT ·block(SB),NOSPLIT,$64-32
   145	MOVV	p_base+8(FP), R5
   146	MOVV	p_len+16(FP), R6
   147	AND	$~63, R6
   148	BEQ	R6, end
   149
   150	// p_len >= 64
   151	MOVV	dig+0(FP), R4
   152	ADDV	R5, R6, R25
   153	MOVW	(0*4)(R4), R8	// a = H0
   154	MOVW	(1*4)(R4), R9	// b = H1
   155	MOVW	(2*4)(R4), R10	// c = H2
   156	MOVW	(3*4)(R4), R11	// d = H3
   157	MOVW	(4*4)(R4), R12	// e = H4
   158	MOVW	(5*4)(R4), R13	// f = H5
   159	MOVW	(6*4)(R4), R14	// g = H6
   160	MOVW	(7*4)(R4), R15	// h = H7
   161
   162loop:
   163	SHA256ROUND0(0,  0x428a2f98, R8,  R9,  R10, R11, R12, R13, R14, R15)
   164	SHA256ROUND0(1,  0x71374491, R15, R8,  R9,  R10, R11, R12, R13, R14)
   165	SHA256ROUND0(2,  0xb5c0fbcf, R14, R15, R8,  R9,  R10, R11, R12, R13)
   166	SHA256ROUND0(3,  0xe9b5dba5, R13, R14, R15, R8,  R9,  R10, R11, R12)
   167	SHA256ROUND0(4,  0x3956c25b, R12, R13, R14, R15, R8,  R9,  R10, R11)
   168	SHA256ROUND0(5,  0x59f111f1, R11, R12, R13, R14, R15, R8,  R9,  R10)
   169	SHA256ROUND0(6,  0x923f82a4, R10, R11, R12, R13, R14, R15, R8,  R9)
   170	SHA256ROUND0(7,  0xab1c5ed5, R9,  R10, R11, R12, R13, R14, R15, R8)
   171	SHA256ROUND0(8,  0xd807aa98, R8,  R9,  R10, R11, R12, R13, R14, R15)
   172	SHA256ROUND0(9,  0x12835b01, R15, R8,  R9,  R10, R11, R12, R13, R14)
   173	SHA256ROUND0(10, 0x243185be, R14, R15, R8,  R9,  R10, R11, R12, R13)
   174	SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8,  R9,  R10, R11, R12)
   175	SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8,  R9,  R10, R11)
   176	SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8,  R9,  R10)
   177	SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8,  R9)
   178	SHA256ROUND0(15, 0xc19bf174, R9,  R10, R11, R12, R13, R14, R15, R8)
   179
   180	SHA256ROUND1(16, 0xe49b69c1, R8,  R9,  R10, R11, R12, R13, R14, R15)
   181	SHA256ROUND1(17, 0xefbe4786, R15, R8,  R9,  R10, R11, R12, R13, R14)
   182	SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8,  R9,  R10, R11, R12, R13)
   183	SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8,  R9,  R10, R11, R12)
   184	SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8,  R9,  R10, R11)
   185	SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8,  R9,  R10)
   186	SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8,  R9)
   187	SHA256ROUND1(23, 0x76f988da, R9,  R10, R11, R12, R13, R14, R15, R8)
   188	SHA256ROUND1(24, 0x983e5152, R8,  R9,  R10, R11, R12, R13, R14, R15)
   189	SHA256ROUND1(25, 0xa831c66d, R15, R8,  R9,  R10, R11, R12, R13, R14)
   190	SHA256ROUND1(26, 0xb00327c8, R14, R15, R8,  R9,  R10, R11, R12, R13)
   191	SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8,  R9,  R10, R11, R12)
   192	SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8,  R9,  R10, R11)
   193	SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8,  R9,  R10)
   194	SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8,  R9)
   195	SHA256ROUND1(31, 0x14292967, R9,  R10, R11, R12, R13, R14, R15, R8)
   196	SHA256ROUND1(32, 0x27b70a85, R8,  R9,  R10, R11, R12, R13, R14, R15)
   197	SHA256ROUND1(33, 0x2e1b2138, R15, R8,  R9,  R10, R11, R12, R13, R14)
   198	SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8,  R9,  R10, R11, R12, R13)
   199	SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8,  R9,  R10, R11, R12)
   200	SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8,  R9,  R10, R11)
   201	SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   202	SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8,  R9)
   203	SHA256ROUND1(39, 0x92722c85, R9,  R10, R11, R12, R13, R14, R15, R8)
   204	SHA256ROUND1(40, 0xa2bfe8a1, R8,  R9,  R10, R11, R12, R13, R14, R15)
   205	SHA256ROUND1(41, 0xa81a664b, R15, R8,  R9,  R10, R11, R12, R13, R14)
   206	SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8,  R9,  R10, R11, R12, R13)
   207	SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8,  R9,  R10, R11, R12)
   208	SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8,  R9,  R10, R11)
   209	SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8,  R9,  R10)
   210	SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8,  R9)
   211	SHA256ROUND1(47, 0x106aa070, R9,  R10, R11, R12, R13, R14, R15, R8)
   212	SHA256ROUND1(48, 0x19a4c116, R8,  R9,  R10, R11, R12, R13, R14, R15)
   213	SHA256ROUND1(49, 0x1e376c08, R15, R8,  R9,  R10, R11, R12, R13, R14)
   214	SHA256ROUND1(50, 0x2748774c, R14, R15, R8,  R9,  R10, R11, R12, R13)
   215	SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8,  R9,  R10, R11, R12)
   216	SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8,  R9,  R10, R11)
   217	SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8,  R9,  R10)
   218	SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8,  R9)
   219	SHA256ROUND1(55, 0x682e6ff3, R9,  R10, R11, R12, R13, R14, R15, R8)
   220	SHA256ROUND1(56, 0x748f82ee, R8,  R9,  R10, R11, R12, R13, R14, R15)
   221	SHA256ROUND1(57, 0x78a5636f, R15, R8,  R9,  R10, R11, R12, R13, R14)
   222	SHA256ROUND1(58, 0x84c87814, R14, R15, R8,  R9,  R10, R11, R12, R13)
   223	SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8,  R9,  R10, R11, R12)
   224	SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8,  R9,  R10, R11)
   225	SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   226	SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8,  R9)
   227	SHA256ROUND1(63, 0xc67178f2, R9,  R10, R11, R12, R13, R14, R15, R8)
   228
   229	MOVW	(0*4)(R4), REGTMP
   230	MOVW	(1*4)(R4), REGTMP1
   231	MOVW	(2*4)(R4), REGTMP2
   232	MOVW	(3*4)(R4), REGTMP3
   233	ADD	REGTMP, R8	// H0 = a + H0
   234	ADD	REGTMP1, R9	// H1 = b + H1
   235	ADD	REGTMP2, R10	// H2 = c + H2
   236	ADD	REGTMP3, R11	// H3 = d + H3
   237	MOVW	R8, (0*4)(R4)
   238	MOVW	R9, (1*4)(R4)
   239	MOVW	R10, (2*4)(R4)
   240	MOVW	R11, (3*4)(R4)
   241	MOVW	(4*4)(R4), REGTMP
   242	MOVW	(5*4)(R4), REGTMP1
   243	MOVW	(6*4)(R4), REGTMP2
   244	MOVW	(7*4)(R4), REGTMP3
   245	ADD	REGTMP, R12	// H4 = e + H4
   246	ADD	REGTMP1, R13	// H5 = f + H5
   247	ADD	REGTMP2, R14	// H6 = g + H6
   248	ADD	REGTMP3, R15	// H7 = h + H7
   249	MOVW	R12, (4*4)(R4)
   250	MOVW	R13, (5*4)(R4)
   251	MOVW	R14, (6*4)(R4)
   252	MOVW	R15, (7*4)(R4)
   253
   254	ADDV	$64, R5
   255	BNE	R5, R25, loop
   256
   257end:
   258	RET

View as plain text