...
Run Format

Text file src/math/big/arith_mips64x.s

Documentation: math/big

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5// Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
     6
     7//go:build !math_big_pure_go && (mips64 || mips64le)
     8
     9#include "textflag.h"
    10
    11// func addVV(z, x, y []Word) (c Word)
    12TEXT ·addVV(SB), NOSPLIT, $0
    13	MOVV z_len+8(FP), R1
    14	MOVV x_base+24(FP), R2
    15	MOVV y_base+48(FP), R3
    16	MOVV z_base+0(FP), R4
    17	// compute unrolled loop lengths
    18	AND $3, R1, R5
    19	SRLV $2, R1
    20	XOR R24, R24	// clear carry
    21loop1:
    22	BEQ R5, loop1done
    23loop1cont:
    24	// unroll 1X
    25	MOVV 0(R2), R6
    26	MOVV 0(R3), R7
    27	ADDVU R7, R6	// ADCS R7, R6, R6 (cr=R24)
    28	SGTU R7, R6, R23	// ...
    29	ADDVU R24, R6	// ...
    30	SGTU R24, R6, R24	// ...
    31	ADDVU R23, R24	// ...
    32	MOVV R6, 0(R4)
    33	ADDVU $8, R2
    34	ADDVU $8, R3
    35	ADDVU $8, R4
    36	SUBVU $1, R5
    37	BNE R5, loop1cont
    38loop1done:
    39loop4:
    40	BEQ R1, loop4done
    41loop4cont:
    42	// unroll 4X
    43	MOVV 0(R2), R5
    44	MOVV 8(R2), R6
    45	MOVV 16(R2), R7
    46	MOVV 24(R2), R8
    47	MOVV 0(R3), R9
    48	MOVV 8(R3), R10
    49	MOVV 16(R3), R11
    50	MOVV 24(R3), R12
    51	ADDVU R9, R5	// ADCS R9, R5, R5 (cr=R24)
    52	SGTU R9, R5, R23	// ...
    53	ADDVU R24, R5	// ...
    54	SGTU R24, R5, R24	// ...
    55	ADDVU R23, R24	// ...
    56	ADDVU R10, R6	// ADCS R10, R6, R6 (cr=R24)
    57	SGTU R10, R6, R23	// ...
    58	ADDVU R24, R6	// ...
    59	SGTU R24, R6, R24	// ...
    60	ADDVU R23, R24	// ...
    61	ADDVU R11, R7	// ADCS R11, R7, R7 (cr=R24)
    62	SGTU R11, R7, R23	// ...
    63	ADDVU R24, R7	// ...
    64	SGTU R24, R7, R24	// ...
    65	ADDVU R23, R24	// ...
    66	ADDVU R12, R8	// ADCS R12, R8, R8 (cr=R24)
    67	SGTU R12, R8, R23	// ...
    68	ADDVU R24, R8	// ...
    69	SGTU R24, R8, R24	// ...
    70	ADDVU R23, R24	// ...
    71	MOVV R5, 0(R4)
    72	MOVV R6, 8(R4)
    73	MOVV R7, 16(R4)
    74	MOVV R8, 24(R4)
    75	ADDVU $32, R2
    76	ADDVU $32, R3
    77	ADDVU $32, R4
    78	SUBVU $1, R1
    79	BNE R1, loop4cont
    80loop4done:
    81	MOVV R24, c+72(FP)
    82	RET
    83
    84// func subVV(z, x, y []Word) (c Word)
    85TEXT ·subVV(SB), NOSPLIT, $0
    86	MOVV z_len+8(FP), R1
    87	MOVV x_base+24(FP), R2
    88	MOVV y_base+48(FP), R3
    89	MOVV z_base+0(FP), R4
    90	// compute unrolled loop lengths
    91	AND $3, R1, R5
    92	SRLV $2, R1
    93	XOR R24, R24	// clear carry
    94loop1:
    95	BEQ R5, loop1done
    96loop1cont:
    97	// unroll 1X
    98	MOVV 0(R2), R6
    99	MOVV 0(R3), R7
   100	SGTU R24, R6, R23	// SBCS R7, R6, R6
   101	SUBVU R24, R6	// ...
   102	SGTU R7, R6, R24	// ...
   103	SUBVU R7, R6	// ...
   104	ADDVU R23, R24	// ...
   105	MOVV R6, 0(R4)
   106	ADDVU $8, R2
   107	ADDVU $8, R3
   108	ADDVU $8, R4
   109	SUBVU $1, R5
   110	BNE R5, loop1cont
   111loop1done:
   112loop4:
   113	BEQ R1, loop4done
   114loop4cont:
   115	// unroll 4X
   116	MOVV 0(R2), R5
   117	MOVV 8(R2), R6
   118	MOVV 16(R2), R7
   119	MOVV 24(R2), R8
   120	MOVV 0(R3), R9
   121	MOVV 8(R3), R10
   122	MOVV 16(R3), R11
   123	MOVV 24(R3), R12
   124	SGTU R24, R5, R23	// SBCS R9, R5, R5
   125	SUBVU R24, R5	// ...
   126	SGTU R9, R5, R24	// ...
   127	SUBVU R9, R5	// ...
   128	ADDVU R23, R24	// ...
   129	SGTU R24, R6, R23	// SBCS R10, R6, R6
   130	SUBVU R24, R6	// ...
   131	SGTU R10, R6, R24	// ...
   132	SUBVU R10, R6	// ...
   133	ADDVU R23, R24	// ...
   134	SGTU R24, R7, R23	// SBCS R11, R7, R7
   135	SUBVU R24, R7	// ...
   136	SGTU R11, R7, R24	// ...
   137	SUBVU R11, R7	// ...
   138	ADDVU R23, R24	// ...
   139	SGTU R24, R8, R23	// SBCS R12, R8, R8
   140	SUBVU R24, R8	// ...
   141	SGTU R12, R8, R24	// ...
   142	SUBVU R12, R8	// ...
   143	ADDVU R23, R24	// ...
   144	MOVV R5, 0(R4)
   145	MOVV R6, 8(R4)
   146	MOVV R7, 16(R4)
   147	MOVV R8, 24(R4)
   148	ADDVU $32, R2
   149	ADDVU $32, R3
   150	ADDVU $32, R4
   151	SUBVU $1, R1
   152	BNE R1, loop4cont
   153loop4done:
   154	MOVV R24, c+72(FP)
   155	RET
   156
   157// func lshVU(z, x []Word, s uint) (c Word)
   158TEXT ·lshVU(SB), NOSPLIT, $0
   159	MOVV z_len+8(FP), R1
   160	BEQ R1, ret0
   161	MOVV s+48(FP), R2
   162	MOVV x_base+24(FP), R3
   163	MOVV z_base+0(FP), R4
   164	// run loop backward
   165	SLLV $3, R1, R5
   166	ADDVU R5, R3
   167	SLLV $3, R1, R5
   168	ADDVU R5, R4
   169	// shift first word into carry
   170	MOVV -8(R3), R5
   171	MOVV $64, R6
   172	SUBVU R2, R6
   173	SRLV R6, R5, R7
   174	SLLV R2, R5
   175	MOVV R7, c+56(FP)
   176	// shift remaining words
   177	SUBVU $1, R1
   178	// compute unrolled loop lengths
   179	AND $3, R1, R7
   180	SRLV $2, R1
   181loop1:
   182	BEQ R7, loop1done
   183loop1cont:
   184	// unroll 1X
   185	MOVV -16(R3), R8
   186	SRLV R6, R8, R9
   187	OR R5, R9
   188	SLLV R2, R8, R5
   189	MOVV R9, -8(R4)
   190	ADDVU $-8, R3
   191	ADDVU $-8, R4
   192	SUBVU $1, R7
   193	BNE R7, loop1cont
   194loop1done:
   195loop4:
   196	BEQ R1, loop4done
   197loop4cont:
   198	// unroll 4X
   199	MOVV -16(R3), R7
   200	MOVV -24(R3), R8
   201	MOVV -32(R3), R9
   202	MOVV -40(R3), R10
   203	SRLV R6, R7, R11
   204	OR R5, R11
   205	SLLV R2, R7, R5
   206	SRLV R6, R8, R7
   207	OR R5, R7
   208	SLLV R2, R8, R5
   209	SRLV R6, R9, R8
   210	OR R5, R8
   211	SLLV R2, R9, R5
   212	SRLV R6, R10, R9
   213	OR R5, R9
   214	SLLV R2, R10, R5
   215	MOVV R11, -8(R4)
   216	MOVV R7, -16(R4)
   217	MOVV R8, -24(R4)
   218	MOVV R9, -32(R4)
   219	ADDVU $-32, R3
   220	ADDVU $-32, R4
   221	SUBVU $1, R1
   222	BNE R1, loop4cont
   223loop4done:
   224	// store final shifted bits
   225	MOVV R5, -8(R4)
   226	RET
   227ret0:
   228	MOVV R0, c+56(FP)
   229	RET
   230
   231// func rshVU(z, x []Word, s uint) (c Word)
   232TEXT ·rshVU(SB), NOSPLIT, $0
   233	MOVV z_len+8(FP), R1
   234	BEQ R1, ret0
   235	MOVV s+48(FP), R2
   236	MOVV x_base+24(FP), R3
   237	MOVV z_base+0(FP), R4
   238	// shift first word into carry
   239	MOVV 0(R3), R5
   240	MOVV $64, R6
   241	SUBVU R2, R6
   242	SLLV R6, R5, R7
   243	SRLV R2, R5
   244	MOVV R7, c+56(FP)
   245	// shift remaining words
   246	SUBVU $1, R1
   247	// compute unrolled loop lengths
   248	AND $3, R1, R7
   249	SRLV $2, R1
   250loop1:
   251	BEQ R7, loop1done
   252loop1cont:
   253	// unroll 1X
   254	MOVV 8(R3), R8
   255	SLLV R6, R8, R9
   256	OR R5, R9
   257	SRLV R2, R8, R5
   258	MOVV R9, 0(R4)
   259	ADDVU $8, R3
   260	ADDVU $8, R4
   261	SUBVU $1, R7
   262	BNE R7, loop1cont
   263loop1done:
   264loop4:
   265	BEQ R1, loop4done
   266loop4cont:
   267	// unroll 4X
   268	MOVV 8(R3), R7
   269	MOVV 16(R3), R8
   270	MOVV 24(R3), R9
   271	MOVV 32(R3), R10
   272	SLLV R6, R7, R11
   273	OR R5, R11
   274	SRLV R2, R7, R5
   275	SLLV R6, R8, R7
   276	OR R5, R7
   277	SRLV R2, R8, R5
   278	SLLV R6, R9, R8
   279	OR R5, R8
   280	SRLV R2, R9, R5
   281	SLLV R6, R10, R9
   282	OR R5, R9
   283	SRLV R2, R10, R5
   284	MOVV R11, 0(R4)
   285	MOVV R7, 8(R4)
   286	MOVV R8, 16(R4)
   287	MOVV R9, 24(R4)
   288	ADDVU $32, R3
   289	ADDVU $32, R4
   290	SUBVU $1, R1
   291	BNE R1, loop4cont
   292loop4done:
   293	// store final shifted bits
   294	MOVV R5, 0(R4)
   295	RET
   296ret0:
   297	MOVV R0, c+56(FP)
   298	RET
   299
   300// func mulAddVWW(z, x []Word, m, a Word) (c Word)
   301TEXT ·mulAddVWW(SB), NOSPLIT, $0
   302	MOVV m+48(FP), R1
   303	MOVV a+56(FP), R2
   304	MOVV z_len+8(FP), R3
   305	MOVV x_base+24(FP), R4
   306	MOVV z_base+0(FP), R5
   307	// compute unrolled loop lengths
   308	AND $3, R3, R6
   309	SRLV $2, R3
   310loop1:
   311	BEQ R6, loop1done
   312loop1cont:
   313	// unroll 1X
   314	MOVV 0(R4), R7
   315	// synthetic carry, one column at a time
   316	MULVU R1, R7
   317	MOVV LO, R8
   318	MOVV HI, R9
   319	ADDVU R2, R8, R7	// ADDS R2, R8, R7 (cr=R24)
   320	SGTU R2, R7, R24	// ...
   321	ADDVU R24, R9, R2	// ADC $0, R9, R2
   322	MOVV R7, 0(R5)
   323	ADDVU $8, R4
   324	ADDVU $8, R5
   325	SUBVU $1, R6
   326	BNE R6, loop1cont
   327loop1done:
   328loop4:
   329	BEQ R3, loop4done
   330loop4cont:
   331	// unroll 4X
   332	MOVV 0(R4), R6
   333	MOVV 8(R4), R7
   334	MOVV 16(R4), R8
   335	MOVV 24(R4), R9
   336	// synthetic carry, one column at a time
   337	MULVU R1, R6
   338	MOVV LO, R10
   339	MOVV HI, R11
   340	ADDVU R2, R10, R6	// ADDS R2, R10, R6 (cr=R24)
   341	SGTU R2, R6, R24	// ...
   342	ADDVU R24, R11, R2	// ADC $0, R11, R2
   343	MULVU R1, R7
   344	MOVV LO, R10
   345	MOVV HI, R11
   346	ADDVU R2, R10, R7	// ADDS R2, R10, R7 (cr=R24)
   347	SGTU R2, R7, R24	// ...
   348	ADDVU R24, R11, R2	// ADC $0, R11, R2
   349	MULVU R1, R8
   350	MOVV LO, R10
   351	MOVV HI, R11
   352	ADDVU R2, R10, R8	// ADDS R2, R10, R8 (cr=R24)
   353	SGTU R2, R8, R24	// ...
   354	ADDVU R24, R11, R2	// ADC $0, R11, R2
   355	MULVU R1, R9
   356	MOVV LO, R10
   357	MOVV HI, R11
   358	ADDVU R2, R10, R9	// ADDS R2, R10, R9 (cr=R24)
   359	SGTU R2, R9, R24	// ...
   360	ADDVU R24, R11, R2	// ADC $0, R11, R2
   361	MOVV R6, 0(R5)
   362	MOVV R7, 8(R5)
   363	MOVV R8, 16(R5)
   364	MOVV R9, 24(R5)
   365	ADDVU $32, R4
   366	ADDVU $32, R5
   367	SUBVU $1, R3
   368	BNE R3, loop4cont
   369loop4done:
   370	MOVV R2, c+64(FP)
   371	RET
   372
   373// func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
   374TEXT ·addMulVVWW(SB), NOSPLIT, $0
   375	MOVV m+72(FP), R1
   376	MOVV a+80(FP), R2
   377	MOVV z_len+8(FP), R3
   378	MOVV x_base+24(FP), R4
   379	MOVV y_base+48(FP), R5
   380	MOVV z_base+0(FP), R6
   381	// compute unrolled loop lengths
   382	AND $3, R3, R7
   383	SRLV $2, R3
   384loop1:
   385	BEQ R7, loop1done
   386loop1cont:
   387	// unroll 1X
   388	MOVV 0(R4), R8
   389	MOVV 0(R5), R9
   390	// synthetic carry, one column at a time
   391	MULVU R1, R9
   392	MOVV LO, R10
   393	MOVV HI, R11
   394	ADDVU R8, R10	// ADDS R8, R10, R10 (cr=R24)
   395	SGTU R8, R10, R24	// ...
   396	ADDVU R24, R11	// ADC $0, R11, R11
   397	ADDVU R2, R10, R9	// ADDS R2, R10, R9 (cr=R24)
   398	SGTU R2, R9, R24	// ...
   399	ADDVU R24, R11, R2	// ADC $0, R11, R2
   400	MOVV R9, 0(R6)
   401	ADDVU $8, R4
   402	ADDVU $8, R5
   403	ADDVU $8, R6
   404	SUBVU $1, R7
   405	BNE R7, loop1cont
   406loop1done:
   407loop4:
   408	BEQ R3, loop4done
   409loop4cont:
   410	// unroll 4X
   411	MOVV 0(R4), R7
   412	MOVV 8(R4), R8
   413	MOVV 16(R4), R9
   414	MOVV 24(R4), R10
   415	MOVV 0(R5), R11
   416	MOVV 8(R5), R12
   417	MOVV 16(R5), R13
   418	MOVV 24(R5), R14
   419	// synthetic carry, one column at a time
   420	MULVU R1, R11
   421	MOVV LO, R15
   422	MOVV HI, R16
   423	ADDVU R7, R15	// ADDS R7, R15, R15 (cr=R24)
   424	SGTU R7, R15, R24	// ...
   425	ADDVU R24, R16	// ADC $0, R16, R16
   426	ADDVU R2, R15, R11	// ADDS R2, R15, R11 (cr=R24)
   427	SGTU R2, R11, R24	// ...
   428	ADDVU R24, R16, R2	// ADC $0, R16, R2
   429	MULVU R1, R12
   430	MOVV LO, R15
   431	MOVV HI, R16
   432	ADDVU R8, R15	// ADDS R8, R15, R15 (cr=R24)
   433	SGTU R8, R15, R24	// ...
   434	ADDVU R24, R16	// ADC $0, R16, R16
   435	ADDVU R2, R15, R12	// ADDS R2, R15, R12 (cr=R24)
   436	SGTU R2, R12, R24	// ...
   437	ADDVU R24, R16, R2	// ADC $0, R16, R2
   438	MULVU R1, R13
   439	MOVV LO, R15
   440	MOVV HI, R16
   441	ADDVU R9, R15	// ADDS R9, R15, R15 (cr=R24)
   442	SGTU R9, R15, R24	// ...
   443	ADDVU R24, R16	// ADC $0, R16, R16
   444	ADDVU R2, R15, R13	// ADDS R2, R15, R13 (cr=R24)
   445	SGTU R2, R13, R24	// ...
   446	ADDVU R24, R16, R2	// ADC $0, R16, R2
   447	MULVU R1, R14
   448	MOVV LO, R15
   449	MOVV HI, R16
   450	ADDVU R10, R15	// ADDS R10, R15, R15 (cr=R24)
   451	SGTU R10, R15, R24	// ...
   452	ADDVU R24, R16	// ADC $0, R16, R16
   453	ADDVU R2, R15, R14	// ADDS R2, R15, R14 (cr=R24)
   454	SGTU R2, R14, R24	// ...
   455	ADDVU R24, R16, R2	// ADC $0, R16, R2
   456	MOVV R11, 0(R6)
   457	MOVV R12, 8(R6)
   458	MOVV R13, 16(R6)
   459	MOVV R14, 24(R6)
   460	ADDVU $32, R4
   461	ADDVU $32, R5
   462	ADDVU $32, R6
   463	SUBVU $1, R3
   464	BNE R3, loop4cont
   465loop4done:
   466	MOVV R2, c+88(FP)
   467	RET

View as plain text