...
Run Format

Text file src/math/big/arith_loong64.s

Documentation: math/big

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5// Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
     6
     7//go:build !math_big_pure_go
     8
     9#include "textflag.h"
    10
    11// func addVV(z, x, y []Word) (c Word)
    12TEXT ·addVV(SB), NOSPLIT, $0
    13	MOVV z_len+8(FP), R4
    14	MOVV x_base+24(FP), R5
    15	MOVV y_base+48(FP), R6
    16	MOVV z_base+0(FP), R7
    17	// compute unrolled loop lengths
    18	AND $3, R4, R8
    19	SRLV $2, R4
    20	XOR R28, R28	// clear carry
    21loop1:
    22	BEQ R8, loop1done
    23loop1cont:
    24	// unroll 1X
    25	MOVV 0(R5), R9
    26	MOVV 0(R6), R10
    27	ADDVU R10, R9	// ADCS R10, R9, R9 (cr=R28)
    28	SGTU R10, R9, R30	// ...
    29	ADDVU R28, R9	// ...
    30	SGTU R28, R9, R28	// ...
    31	ADDVU R30, R28	// ...
    32	MOVV R9, 0(R7)
    33	ADDVU $8, R5
    34	ADDVU $8, R6
    35	ADDVU $8, R7
    36	SUBVU $1, R8
    37	BNE R8, loop1cont
    38loop1done:
    39loop4:
    40	BEQ R4, loop4done
    41loop4cont:
    42	// unroll 4X
    43	MOVV 0(R5), R8
    44	MOVV 8(R5), R9
    45	MOVV 16(R5), R10
    46	MOVV 24(R5), R11
    47	MOVV 0(R6), R12
    48	MOVV 8(R6), R13
    49	MOVV 16(R6), R14
    50	MOVV 24(R6), R15
    51	ADDVU R12, R8	// ADCS R12, R8, R8 (cr=R28)
    52	SGTU R12, R8, R30	// ...
    53	ADDVU R28, R8	// ...
    54	SGTU R28, R8, R28	// ...
    55	ADDVU R30, R28	// ...
    56	ADDVU R13, R9	// ADCS R13, R9, R9 (cr=R28)
    57	SGTU R13, R9, R30	// ...
    58	ADDVU R28, R9	// ...
    59	SGTU R28, R9, R28	// ...
    60	ADDVU R30, R28	// ...
    61	ADDVU R14, R10	// ADCS R14, R10, R10 (cr=R28)
    62	SGTU R14, R10, R30	// ...
    63	ADDVU R28, R10	// ...
    64	SGTU R28, R10, R28	// ...
    65	ADDVU R30, R28	// ...
    66	ADDVU R15, R11	// ADCS R15, R11, R11 (cr=R28)
    67	SGTU R15, R11, R30	// ...
    68	ADDVU R28, R11	// ...
    69	SGTU R28, R11, R28	// ...
    70	ADDVU R30, R28	// ...
    71	MOVV R8, 0(R7)
    72	MOVV R9, 8(R7)
    73	MOVV R10, 16(R7)
    74	MOVV R11, 24(R7)
    75	ADDVU $32, R5
    76	ADDVU $32, R6
    77	ADDVU $32, R7
    78	SUBVU $1, R4
    79	BNE R4, loop4cont
    80loop4done:
    81	MOVV R28, c+72(FP)
    82	RET
    83
    84// func subVV(z, x, y []Word) (c Word)
    85TEXT ·subVV(SB), NOSPLIT, $0
    86	MOVV z_len+8(FP), R4
    87	MOVV x_base+24(FP), R5
    88	MOVV y_base+48(FP), R6
    89	MOVV z_base+0(FP), R7
    90	// compute unrolled loop lengths
    91	AND $3, R4, R8
    92	SRLV $2, R4
    93	XOR R28, R28	// clear carry
    94loop1:
    95	BEQ R8, loop1done
    96loop1cont:
    97	// unroll 1X
    98	MOVV 0(R5), R9
    99	MOVV 0(R6), R10
   100	SGTU R28, R9, R30	// SBCS R10, R9, R9
   101	SUBVU R28, R9	// ...
   102	SGTU R10, R9, R28	// ...
   103	SUBVU R10, R9	// ...
   104	ADDVU R30, R28	// ...
   105	MOVV R9, 0(R7)
   106	ADDVU $8, R5
   107	ADDVU $8, R6
   108	ADDVU $8, R7
   109	SUBVU $1, R8
   110	BNE R8, loop1cont
   111loop1done:
   112loop4:
   113	BEQ R4, loop4done
   114loop4cont:
   115	// unroll 4X
   116	MOVV 0(R5), R8
   117	MOVV 8(R5), R9
   118	MOVV 16(R5), R10
   119	MOVV 24(R5), R11
   120	MOVV 0(R6), R12
   121	MOVV 8(R6), R13
   122	MOVV 16(R6), R14
   123	MOVV 24(R6), R15
   124	SGTU R28, R8, R30	// SBCS R12, R8, R8
   125	SUBVU R28, R8	// ...
   126	SGTU R12, R8, R28	// ...
   127	SUBVU R12, R8	// ...
   128	ADDVU R30, R28	// ...
   129	SGTU R28, R9, R30	// SBCS R13, R9, R9
   130	SUBVU R28, R9	// ...
   131	SGTU R13, R9, R28	// ...
   132	SUBVU R13, R9	// ...
   133	ADDVU R30, R28	// ...
   134	SGTU R28, R10, R30	// SBCS R14, R10, R10
   135	SUBVU R28, R10	// ...
   136	SGTU R14, R10, R28	// ...
   137	SUBVU R14, R10	// ...
   138	ADDVU R30, R28	// ...
   139	SGTU R28, R11, R30	// SBCS R15, R11, R11
   140	SUBVU R28, R11	// ...
   141	SGTU R15, R11, R28	// ...
   142	SUBVU R15, R11	// ...
   143	ADDVU R30, R28	// ...
   144	MOVV R8, 0(R7)
   145	MOVV R9, 8(R7)
   146	MOVV R10, 16(R7)
   147	MOVV R11, 24(R7)
   148	ADDVU $32, R5
   149	ADDVU $32, R6
   150	ADDVU $32, R7
   151	SUBVU $1, R4
   152	BNE R4, loop4cont
   153loop4done:
   154	MOVV R28, c+72(FP)
   155	RET
   156
   157// func lshVU(z, x []Word, s uint) (c Word)
   158TEXT ·lshVU(SB), NOSPLIT, $0
   159	MOVV z_len+8(FP), R4
   160	BEQ R4, ret0
   161	MOVV s+48(FP), R5
   162	MOVV x_base+24(FP), R6
   163	MOVV z_base+0(FP), R7
   164	// run loop backward
   165	SLLV $3, R4, R8
   166	ADDVU R8, R6
   167	SLLV $3, R4, R8
   168	ADDVU R8, R7
   169	// shift first word into carry
   170	MOVV -8(R6), R8
   171	MOVV $64, R9
   172	SUBVU R5, R9
   173	SRLV R9, R8, R10
   174	SLLV R5, R8
   175	MOVV R10, c+56(FP)
   176	// shift remaining words
   177	SUBVU $1, R4
   178	// compute unrolled loop lengths
   179	AND $3, R4, R10
   180	SRLV $2, R4
   181loop1:
   182	BEQ R10, loop1done
   183loop1cont:
   184	// unroll 1X
   185	MOVV -16(R6), R11
   186	SRLV R9, R11, R12
   187	OR R8, R12
   188	SLLV R5, R11, R8
   189	MOVV R12, -8(R7)
   190	ADDVU $-8, R6
   191	ADDVU $-8, R7
   192	SUBVU $1, R10
   193	BNE R10, loop1cont
   194loop1done:
   195loop4:
   196	BEQ R4, loop4done
   197loop4cont:
   198	// unroll 4X
   199	MOVV -16(R6), R10
   200	MOVV -24(R6), R11
   201	MOVV -32(R6), R12
   202	MOVV -40(R6), R13
   203	SRLV R9, R10, R14
   204	OR R8, R14
   205	SLLV R5, R10, R8
   206	SRLV R9, R11, R10
   207	OR R8, R10
   208	SLLV R5, R11, R8
   209	SRLV R9, R12, R11
   210	OR R8, R11
   211	SLLV R5, R12, R8
   212	SRLV R9, R13, R12
   213	OR R8, R12
   214	SLLV R5, R13, R8
   215	MOVV R14, -8(R7)
   216	MOVV R10, -16(R7)
   217	MOVV R11, -24(R7)
   218	MOVV R12, -32(R7)
   219	ADDVU $-32, R6
   220	ADDVU $-32, R7
   221	SUBVU $1, R4
   222	BNE R4, loop4cont
   223loop4done:
   224	// store final shifted bits
   225	MOVV R8, -8(R7)
   226	RET
   227ret0:
   228	MOVV R0, c+56(FP)
   229	RET
   230
   231// func rshVU(z, x []Word, s uint) (c Word)
   232TEXT ·rshVU(SB), NOSPLIT, $0
   233	MOVV z_len+8(FP), R4
   234	BEQ R4, ret0
   235	MOVV s+48(FP), R5
   236	MOVV x_base+24(FP), R6
   237	MOVV z_base+0(FP), R7
   238	// shift first word into carry
   239	MOVV 0(R6), R8
   240	MOVV $64, R9
   241	SUBVU R5, R9
   242	SLLV R9, R8, R10
   243	SRLV R5, R8
   244	MOVV R10, c+56(FP)
   245	// shift remaining words
   246	SUBVU $1, R4
   247	// compute unrolled loop lengths
   248	AND $3, R4, R10
   249	SRLV $2, R4
   250loop1:
   251	BEQ R10, loop1done
   252loop1cont:
   253	// unroll 1X
   254	MOVV 8(R6), R11
   255	SLLV R9, R11, R12
   256	OR R8, R12
   257	SRLV R5, R11, R8
   258	MOVV R12, 0(R7)
   259	ADDVU $8, R6
   260	ADDVU $8, R7
   261	SUBVU $1, R10
   262	BNE R10, loop1cont
   263loop1done:
   264loop4:
   265	BEQ R4, loop4done
   266loop4cont:
   267	// unroll 4X
   268	MOVV 8(R6), R10
   269	MOVV 16(R6), R11
   270	MOVV 24(R6), R12
   271	MOVV 32(R6), R13
   272	SLLV R9, R10, R14
   273	OR R8, R14
   274	SRLV R5, R10, R8
   275	SLLV R9, R11, R10
   276	OR R8, R10
   277	SRLV R5, R11, R8
   278	SLLV R9, R12, R11
   279	OR R8, R11
   280	SRLV R5, R12, R8
   281	SLLV R9, R13, R12
   282	OR R8, R12
   283	SRLV R5, R13, R8
   284	MOVV R14, 0(R7)
   285	MOVV R10, 8(R7)
   286	MOVV R11, 16(R7)
   287	MOVV R12, 24(R7)
   288	ADDVU $32, R6
   289	ADDVU $32, R7
   290	SUBVU $1, R4
   291	BNE R4, loop4cont
   292loop4done:
   293	// store final shifted bits
   294	MOVV R8, 0(R7)
   295	RET
   296ret0:
   297	MOVV R0, c+56(FP)
   298	RET
   299
   300// func mulAddVWW(z, x []Word, m, a Word) (c Word)
   301TEXT ·mulAddVWW(SB), NOSPLIT, $0
   302	MOVV m+48(FP), R4
   303	MOVV a+56(FP), R5
   304	MOVV z_len+8(FP), R6
   305	MOVV x_base+24(FP), R7
   306	MOVV z_base+0(FP), R8
   307	// compute unrolled loop lengths
   308	AND $3, R6, R9
   309	SRLV $2, R6
   310loop1:
   311	BEQ R9, loop1done
   312loop1cont:
   313	// unroll 1X
   314	MOVV 0(R7), R10
   315	// synthetic carry, one column at a time
   316	MULV R4, R10, R11
   317	MULHVU R4, R10, R12
   318	ADDVU R5, R11, R10	// ADDS R5, R11, R10 (cr=R28)
   319	SGTU R5, R10, R28	// ...
   320	ADDVU R28, R12, R5	// ADC $0, R12, R5
   321	MOVV R10, 0(R8)
   322	ADDVU $8, R7
   323	ADDVU $8, R8
   324	SUBVU $1, R9
   325	BNE R9, loop1cont
   326loop1done:
   327loop4:
   328	BEQ R6, loop4done
   329loop4cont:
   330	// unroll 4X
   331	MOVV 0(R7), R9
   332	MOVV 8(R7), R10
   333	MOVV 16(R7), R11
   334	MOVV 24(R7), R12
   335	// synthetic carry, one column at a time
   336	MULV R4, R9, R13
   337	MULHVU R4, R9, R14
   338	ADDVU R5, R13, R9	// ADDS R5, R13, R9 (cr=R28)
   339	SGTU R5, R9, R28	// ...
   340	ADDVU R28, R14, R5	// ADC $0, R14, R5
   341	MULV R4, R10, R13
   342	MULHVU R4, R10, R14
   343	ADDVU R5, R13, R10	// ADDS R5, R13, R10 (cr=R28)
   344	SGTU R5, R10, R28	// ...
   345	ADDVU R28, R14, R5	// ADC $0, R14, R5
   346	MULV R4, R11, R13
   347	MULHVU R4, R11, R14
   348	ADDVU R5, R13, R11	// ADDS R5, R13, R11 (cr=R28)
   349	SGTU R5, R11, R28	// ...
   350	ADDVU R28, R14, R5	// ADC $0, R14, R5
   351	MULV R4, R12, R13
   352	MULHVU R4, R12, R14
   353	ADDVU R5, R13, R12	// ADDS R5, R13, R12 (cr=R28)
   354	SGTU R5, R12, R28	// ...
   355	ADDVU R28, R14, R5	// ADC $0, R14, R5
   356	MOVV R9, 0(R8)
   357	MOVV R10, 8(R8)
   358	MOVV R11, 16(R8)
   359	MOVV R12, 24(R8)
   360	ADDVU $32, R7
   361	ADDVU $32, R8
   362	SUBVU $1, R6
   363	BNE R6, loop4cont
   364loop4done:
   365	MOVV R5, c+64(FP)
   366	RET
   367
   368// func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
   369TEXT ·addMulVVWW(SB), NOSPLIT, $0
   370	MOVV m+72(FP), R4
   371	MOVV a+80(FP), R5
   372	MOVV z_len+8(FP), R6
   373	MOVV x_base+24(FP), R7
   374	MOVV y_base+48(FP), R8
   375	MOVV z_base+0(FP), R9
   376	// compute unrolled loop lengths
   377	AND $3, R6, R10
   378	SRLV $2, R6
   379loop1:
   380	BEQ R10, loop1done
   381loop1cont:
   382	// unroll 1X
   383	MOVV 0(R7), R11
   384	MOVV 0(R8), R12
   385	// synthetic carry, one column at a time
   386	MULV R4, R12, R13
   387	MULHVU R4, R12, R14
   388	ADDVU R11, R13	// ADDS R11, R13, R13 (cr=R28)
   389	SGTU R11, R13, R28	// ...
   390	ADDVU R28, R14	// ADC $0, R14, R14
   391	ADDVU R5, R13, R12	// ADDS R5, R13, R12 (cr=R28)
   392	SGTU R5, R12, R28	// ...
   393	ADDVU R28, R14, R5	// ADC $0, R14, R5
   394	MOVV R12, 0(R9)
   395	ADDVU $8, R7
   396	ADDVU $8, R8
   397	ADDVU $8, R9
   398	SUBVU $1, R10
   399	BNE R10, loop1cont
   400loop1done:
   401loop4:
   402	BEQ R6, loop4done
   403loop4cont:
   404	// unroll 4X
   405	MOVV 0(R7), R10
   406	MOVV 8(R7), R11
   407	MOVV 16(R7), R12
   408	MOVV 24(R7), R13
   409	MOVV 0(R8), R14
   410	MOVV 8(R8), R15
   411	MOVV 16(R8), R16
   412	MOVV 24(R8), R17
   413	// synthetic carry, one column at a time
   414	MULV R4, R14, R18
   415	MULHVU R4, R14, R19
   416	ADDVU R10, R18	// ADDS R10, R18, R18 (cr=R28)
   417	SGTU R10, R18, R28	// ...
   418	ADDVU R28, R19	// ADC $0, R19, R19
   419	ADDVU R5, R18, R14	// ADDS R5, R18, R14 (cr=R28)
   420	SGTU R5, R14, R28	// ...
   421	ADDVU R28, R19, R5	// ADC $0, R19, R5
   422	MULV R4, R15, R18
   423	MULHVU R4, R15, R19
   424	ADDVU R11, R18	// ADDS R11, R18, R18 (cr=R28)
   425	SGTU R11, R18, R28	// ...
   426	ADDVU R28, R19	// ADC $0, R19, R19
   427	ADDVU R5, R18, R15	// ADDS R5, R18, R15 (cr=R28)
   428	SGTU R5, R15, R28	// ...
   429	ADDVU R28, R19, R5	// ADC $0, R19, R5
   430	MULV R4, R16, R18
   431	MULHVU R4, R16, R19
   432	ADDVU R12, R18	// ADDS R12, R18, R18 (cr=R28)
   433	SGTU R12, R18, R28	// ...
   434	ADDVU R28, R19	// ADC $0, R19, R19
   435	ADDVU R5, R18, R16	// ADDS R5, R18, R16 (cr=R28)
   436	SGTU R5, R16, R28	// ...
   437	ADDVU R28, R19, R5	// ADC $0, R19, R5
   438	MULV R4, R17, R18
   439	MULHVU R4, R17, R19
   440	ADDVU R13, R18	// ADDS R13, R18, R18 (cr=R28)
   441	SGTU R13, R18, R28	// ...
   442	ADDVU R28, R19	// ADC $0, R19, R19
   443	ADDVU R5, R18, R17	// ADDS R5, R18, R17 (cr=R28)
   444	SGTU R5, R17, R28	// ...
   445	ADDVU R28, R19, R5	// ADC $0, R19, R5
   446	MOVV R14, 0(R9)
   447	MOVV R15, 8(R9)
   448	MOVV R16, 16(R9)
   449	MOVV R17, 24(R9)
   450	ADDVU $32, R7
   451	ADDVU $32, R8
   452	ADDVU $32, R9
   453	SUBVU $1, R6
   454	BNE R6, loop4cont
   455loop4done:
   456	MOVV R5, c+88(FP)
   457	RET

View as plain text