...
Run Format

Text file src/math/big/arith_mipsx.s

Documentation: math/big

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5// Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
     6
     7//go:build !math_big_pure_go && (mips || mipsle)
     8
     9#include "textflag.h"
    10
    11// func addVV(z, x, y []Word) (c Word)
    12TEXT ·addVV(SB), NOSPLIT, $0
    13	MOVW z_len+4(FP), R1
    14	MOVW x_base+12(FP), R2
    15	MOVW y_base+24(FP), R3
    16	MOVW z_base+0(FP), R4
    17	// compute unrolled loop lengths
    18	AND $3, R1, R5
    19	SRL $2, R1
    20	XOR R24, R24	// clear carry
    21loop1:
    22	BEQ R5, loop1done
    23loop1cont:
    24	// unroll 1X
    25	MOVW 0(R2), R6
    26	MOVW 0(R3), R7
    27	ADDU R7, R6	// ADCS R7, R6, R6 (cr=R24)
    28	SGTU R7, R6, R23	// ...
    29	ADDU R24, R6	// ...
    30	SGTU R24, R6, R24	// ...
    31	ADDU R23, R24	// ...
    32	MOVW R6, 0(R4)
    33	ADDU $4, R2
    34	ADDU $4, R3
    35	ADDU $4, R4
    36	SUBU $1, R5
    37	BNE R5, loop1cont
    38loop1done:
    39loop4:
    40	BEQ R1, loop4done
    41loop4cont:
    42	// unroll 4X
    43	MOVW 0(R2), R5
    44	MOVW 4(R2), R6
    45	MOVW 8(R2), R7
    46	MOVW 12(R2), R8
    47	MOVW 0(R3), R9
    48	MOVW 4(R3), R10
    49	MOVW 8(R3), R11
    50	MOVW 12(R3), R12
    51	ADDU R9, R5	// ADCS R9, R5, R5 (cr=R24)
    52	SGTU R9, R5, R23	// ...
    53	ADDU R24, R5	// ...
    54	SGTU R24, R5, R24	// ...
    55	ADDU R23, R24	// ...
    56	ADDU R10, R6	// ADCS R10, R6, R6 (cr=R24)
    57	SGTU R10, R6, R23	// ...
    58	ADDU R24, R6	// ...
    59	SGTU R24, R6, R24	// ...
    60	ADDU R23, R24	// ...
    61	ADDU R11, R7	// ADCS R11, R7, R7 (cr=R24)
    62	SGTU R11, R7, R23	// ...
    63	ADDU R24, R7	// ...
    64	SGTU R24, R7, R24	// ...
    65	ADDU R23, R24	// ...
    66	ADDU R12, R8	// ADCS R12, R8, R8 (cr=R24)
    67	SGTU R12, R8, R23	// ...
    68	ADDU R24, R8	// ...
    69	SGTU R24, R8, R24	// ...
    70	ADDU R23, R24	// ...
    71	MOVW R5, 0(R4)
    72	MOVW R6, 4(R4)
    73	MOVW R7, 8(R4)
    74	MOVW R8, 12(R4)
    75	ADDU $16, R2
    76	ADDU $16, R3
    77	ADDU $16, R4
    78	SUBU $1, R1
    79	BNE R1, loop4cont
    80loop4done:
    81	MOVW R24, c+36(FP)
    82	RET
    83
    84// func subVV(z, x, y []Word) (c Word)
    85TEXT ·subVV(SB), NOSPLIT, $0
    86	MOVW z_len+4(FP), R1
    87	MOVW x_base+12(FP), R2
    88	MOVW y_base+24(FP), R3
    89	MOVW z_base+0(FP), R4
    90	// compute unrolled loop lengths
    91	AND $3, R1, R5
    92	SRL $2, R1
    93	XOR R24, R24	// clear carry
    94loop1:
    95	BEQ R5, loop1done
    96loop1cont:
    97	// unroll 1X
    98	MOVW 0(R2), R6
    99	MOVW 0(R3), R7
   100	SGTU R24, R6, R23	// SBCS R7, R6, R6
   101	SUBU R24, R6	// ...
   102	SGTU R7, R6, R24	// ...
   103	SUBU R7, R6	// ...
   104	ADDU R23, R24	// ...
   105	MOVW R6, 0(R4)
   106	ADDU $4, R2
   107	ADDU $4, R3
   108	ADDU $4, R4
   109	SUBU $1, R5
   110	BNE R5, loop1cont
   111loop1done:
   112loop4:
   113	BEQ R1, loop4done
   114loop4cont:
   115	// unroll 4X
   116	MOVW 0(R2), R5
   117	MOVW 4(R2), R6
   118	MOVW 8(R2), R7
   119	MOVW 12(R2), R8
   120	MOVW 0(R3), R9
   121	MOVW 4(R3), R10
   122	MOVW 8(R3), R11
   123	MOVW 12(R3), R12
   124	SGTU R24, R5, R23	// SBCS R9, R5, R5
   125	SUBU R24, R5	// ...
   126	SGTU R9, R5, R24	// ...
   127	SUBU R9, R5	// ...
   128	ADDU R23, R24	// ...
   129	SGTU R24, R6, R23	// SBCS R10, R6, R6
   130	SUBU R24, R6	// ...
   131	SGTU R10, R6, R24	// ...
   132	SUBU R10, R6	// ...
   133	ADDU R23, R24	// ...
   134	SGTU R24, R7, R23	// SBCS R11, R7, R7
   135	SUBU R24, R7	// ...
   136	SGTU R11, R7, R24	// ...
   137	SUBU R11, R7	// ...
   138	ADDU R23, R24	// ...
   139	SGTU R24, R8, R23	// SBCS R12, R8, R8
   140	SUBU R24, R8	// ...
   141	SGTU R12, R8, R24	// ...
   142	SUBU R12, R8	// ...
   143	ADDU R23, R24	// ...
   144	MOVW R5, 0(R4)
   145	MOVW R6, 4(R4)
   146	MOVW R7, 8(R4)
   147	MOVW R8, 12(R4)
   148	ADDU $16, R2
   149	ADDU $16, R3
   150	ADDU $16, R4
   151	SUBU $1, R1
   152	BNE R1, loop4cont
   153loop4done:
   154	MOVW R24, c+36(FP)
   155	RET
   156
   157// func lshVU(z, x []Word, s uint) (c Word)
   158TEXT ·lshVU(SB), NOSPLIT, $0
   159	MOVW z_len+4(FP), R1
   160	BEQ R1, ret0
   161	MOVW s+24(FP), R2
   162	MOVW x_base+12(FP), R3
   163	MOVW z_base+0(FP), R4
   164	// run loop backward
   165	SLL $2, R1, R5
   166	ADDU R5, R3
   167	SLL $2, R1, R5
   168	ADDU R5, R4
   169	// shift first word into carry
   170	MOVW -4(R3), R5
   171	MOVW $32, R6
   172	SUBU R2, R6
   173	SRL R6, R5, R7
   174	SLL R2, R5
   175	MOVW R7, c+28(FP)
   176	// shift remaining words
   177	SUBU $1, R1
   178	// compute unrolled loop lengths
   179	AND $3, R1, R7
   180	SRL $2, R1
   181loop1:
   182	BEQ R7, loop1done
   183loop1cont:
   184	// unroll 1X
   185	MOVW -8(R3), R8
   186	SRL R6, R8, R9
   187	OR R5, R9
   188	SLL R2, R8, R5
   189	MOVW R9, -4(R4)
   190	ADDU $-4, R3
   191	ADDU $-4, R4
   192	SUBU $1, R7
   193	BNE R7, loop1cont
   194loop1done:
   195loop4:
   196	BEQ R1, loop4done
   197loop4cont:
   198	// unroll 4X
   199	MOVW -8(R3), R7
   200	MOVW -12(R3), R8
   201	MOVW -16(R3), R9
   202	MOVW -20(R3), R10
   203	SRL R6, R7, R11
   204	OR R5, R11
   205	SLL R2, R7, R5
   206	SRL R6, R8, R7
   207	OR R5, R7
   208	SLL R2, R8, R5
   209	SRL R6, R9, R8
   210	OR R5, R8
   211	SLL R2, R9, R5
   212	SRL R6, R10, R9
   213	OR R5, R9
   214	SLL R2, R10, R5
   215	MOVW R11, -4(R4)
   216	MOVW R7, -8(R4)
   217	MOVW R8, -12(R4)
   218	MOVW R9, -16(R4)
   219	ADDU $-16, R3
   220	ADDU $-16, R4
   221	SUBU $1, R1
   222	BNE R1, loop4cont
   223loop4done:
   224	// store final shifted bits
   225	MOVW R5, -4(R4)
   226	RET
   227ret0:
   228	MOVW R0, c+28(FP)
   229	RET
   230
   231// func rshVU(z, x []Word, s uint) (c Word)
   232TEXT ·rshVU(SB), NOSPLIT, $0
   233	MOVW z_len+4(FP), R1
   234	BEQ R1, ret0
   235	MOVW s+24(FP), R2
   236	MOVW x_base+12(FP), R3
   237	MOVW z_base+0(FP), R4
   238	// shift first word into carry
   239	MOVW 0(R3), R5
   240	MOVW $32, R6
   241	SUBU R2, R6
   242	SLL R6, R5, R7
   243	SRL R2, R5
   244	MOVW R7, c+28(FP)
   245	// shift remaining words
   246	SUBU $1, R1
   247	// compute unrolled loop lengths
   248	AND $3, R1, R7
   249	SRL $2, R1
   250loop1:
   251	BEQ R7, loop1done
   252loop1cont:
   253	// unroll 1X
   254	MOVW 4(R3), R8
   255	SLL R6, R8, R9
   256	OR R5, R9
   257	SRL R2, R8, R5
   258	MOVW R9, 0(R4)
   259	ADDU $4, R3
   260	ADDU $4, R4
   261	SUBU $1, R7
   262	BNE R7, loop1cont
   263loop1done:
   264loop4:
   265	BEQ R1, loop4done
   266loop4cont:
   267	// unroll 4X
   268	MOVW 4(R3), R7
   269	MOVW 8(R3), R8
   270	MOVW 12(R3), R9
   271	MOVW 16(R3), R10
   272	SLL R6, R7, R11
   273	OR R5, R11
   274	SRL R2, R7, R5
   275	SLL R6, R8, R7
   276	OR R5, R7
   277	SRL R2, R8, R5
   278	SLL R6, R9, R8
   279	OR R5, R8
   280	SRL R2, R9, R5
   281	SLL R6, R10, R9
   282	OR R5, R9
   283	SRL R2, R10, R5
   284	MOVW R11, 0(R4)
   285	MOVW R7, 4(R4)
   286	MOVW R8, 8(R4)
   287	MOVW R9, 12(R4)
   288	ADDU $16, R3
   289	ADDU $16, R4
   290	SUBU $1, R1
   291	BNE R1, loop4cont
   292loop4done:
   293	// store final shifted bits
   294	MOVW R5, 0(R4)
   295	RET
   296ret0:
   297	MOVW R0, c+28(FP)
   298	RET
   299
   300// func mulAddVWW(z, x []Word, m, a Word) (c Word)
   301TEXT ·mulAddVWW(SB), NOSPLIT, $0
   302	MOVW m+24(FP), R1
   303	MOVW a+28(FP), R2
   304	MOVW z_len+4(FP), R3
   305	MOVW x_base+12(FP), R4
   306	MOVW z_base+0(FP), R5
   307	// compute unrolled loop lengths
   308	AND $3, R3, R6
   309	SRL $2, R3
   310loop1:
   311	BEQ R6, loop1done
   312loop1cont:
   313	// unroll 1X
   314	MOVW 0(R4), R7
   315	// synthetic carry, one column at a time
   316	MULU R1, R7
   317	MOVW LO, R8
   318	MOVW HI, R9
   319	ADDU R2, R8, R7	// ADDS R2, R8, R7 (cr=R24)
   320	SGTU R2, R7, R24	// ...
   321	ADDU R24, R9, R2	// ADC $0, R9, R2
   322	MOVW R7, 0(R5)
   323	ADDU $4, R4
   324	ADDU $4, R5
   325	SUBU $1, R6
   326	BNE R6, loop1cont
   327loop1done:
   328loop4:
   329	BEQ R3, loop4done
   330loop4cont:
   331	// unroll 4X
   332	MOVW 0(R4), R6
   333	MOVW 4(R4), R7
   334	MOVW 8(R4), R8
   335	MOVW 12(R4), R9
   336	// synthetic carry, one column at a time
   337	MULU R1, R6
   338	MOVW LO, R10
   339	MOVW HI, R11
   340	ADDU R2, R10, R6	// ADDS R2, R10, R6 (cr=R24)
   341	SGTU R2, R6, R24	// ...
   342	ADDU R24, R11, R2	// ADC $0, R11, R2
   343	MULU R1, R7
   344	MOVW LO, R10
   345	MOVW HI, R11
   346	ADDU R2, R10, R7	// ADDS R2, R10, R7 (cr=R24)
   347	SGTU R2, R7, R24	// ...
   348	ADDU R24, R11, R2	// ADC $0, R11, R2
   349	MULU R1, R8
   350	MOVW LO, R10
   351	MOVW HI, R11
   352	ADDU R2, R10, R8	// ADDS R2, R10, R8 (cr=R24)
   353	SGTU R2, R8, R24	// ...
   354	ADDU R24, R11, R2	// ADC $0, R11, R2
   355	MULU R1, R9
   356	MOVW LO, R10
   357	MOVW HI, R11
   358	ADDU R2, R10, R9	// ADDS R2, R10, R9 (cr=R24)
   359	SGTU R2, R9, R24	// ...
   360	ADDU R24, R11, R2	// ADC $0, R11, R2
   361	MOVW R6, 0(R5)
   362	MOVW R7, 4(R5)
   363	MOVW R8, 8(R5)
   364	MOVW R9, 12(R5)
   365	ADDU $16, R4
   366	ADDU $16, R5
   367	SUBU $1, R3
   368	BNE R3, loop4cont
   369loop4done:
   370	MOVW R2, c+32(FP)
   371	RET
   372
   373// func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
   374TEXT ·addMulVVWW(SB), NOSPLIT, $0
   375	MOVW m+36(FP), R1
   376	MOVW a+40(FP), R2
   377	MOVW z_len+4(FP), R3
   378	MOVW x_base+12(FP), R4
   379	MOVW y_base+24(FP), R5
   380	MOVW z_base+0(FP), R6
   381	// compute unrolled loop lengths
   382	AND $3, R3, R7
   383	SRL $2, R3
   384loop1:
   385	BEQ R7, loop1done
   386loop1cont:
   387	// unroll 1X
   388	MOVW 0(R4), R8
   389	MOVW 0(R5), R9
   390	// synthetic carry, one column at a time
   391	MULU R1, R9
   392	MOVW LO, R10
   393	MOVW HI, R11
   394	ADDU R8, R10	// ADDS R8, R10, R10 (cr=R24)
   395	SGTU R8, R10, R24	// ...
   396	ADDU R24, R11	// ADC $0, R11, R11
   397	ADDU R2, R10, R9	// ADDS R2, R10, R9 (cr=R24)
   398	SGTU R2, R9, R24	// ...
   399	ADDU R24, R11, R2	// ADC $0, R11, R2
   400	MOVW R9, 0(R6)
   401	ADDU $4, R4
   402	ADDU $4, R5
   403	ADDU $4, R6
   404	SUBU $1, R7
   405	BNE R7, loop1cont
   406loop1done:
   407loop4:
   408	BEQ R3, loop4done
   409loop4cont:
   410	// unroll 4X
   411	MOVW 0(R4), R7
   412	MOVW 4(R4), R8
   413	MOVW 8(R4), R9
   414	MOVW 12(R4), R10
   415	MOVW 0(R5), R11
   416	MOVW 4(R5), R12
   417	MOVW 8(R5), R13
   418	MOVW 12(R5), R14
   419	// synthetic carry, one column at a time
   420	MULU R1, R11
   421	MOVW LO, R15
   422	MOVW HI, R16
   423	ADDU R7, R15	// ADDS R7, R15, R15 (cr=R24)
   424	SGTU R7, R15, R24	// ...
   425	ADDU R24, R16	// ADC $0, R16, R16
   426	ADDU R2, R15, R11	// ADDS R2, R15, R11 (cr=R24)
   427	SGTU R2, R11, R24	// ...
   428	ADDU R24, R16, R2	// ADC $0, R16, R2
   429	MULU R1, R12
   430	MOVW LO, R15
   431	MOVW HI, R16
   432	ADDU R8, R15	// ADDS R8, R15, R15 (cr=R24)
   433	SGTU R8, R15, R24	// ...
   434	ADDU R24, R16	// ADC $0, R16, R16
   435	ADDU R2, R15, R12	// ADDS R2, R15, R12 (cr=R24)
   436	SGTU R2, R12, R24	// ...
   437	ADDU R24, R16, R2	// ADC $0, R16, R2
   438	MULU R1, R13
   439	MOVW LO, R15
   440	MOVW HI, R16
   441	ADDU R9, R15	// ADDS R9, R15, R15 (cr=R24)
   442	SGTU R9, R15, R24	// ...
   443	ADDU R24, R16	// ADC $0, R16, R16
   444	ADDU R2, R15, R13	// ADDS R2, R15, R13 (cr=R24)
   445	SGTU R2, R13, R24	// ...
   446	ADDU R24, R16, R2	// ADC $0, R16, R2
   447	MULU R1, R14
   448	MOVW LO, R15
   449	MOVW HI, R16
   450	ADDU R10, R15	// ADDS R10, R15, R15 (cr=R24)
   451	SGTU R10, R15, R24	// ...
   452	ADDU R24, R16	// ADC $0, R16, R16
   453	ADDU R2, R15, R14	// ADDS R2, R15, R14 (cr=R24)
   454	SGTU R2, R14, R24	// ...
   455	ADDU R24, R16, R2	// ADC $0, R16, R2
   456	MOVW R11, 0(R6)
   457	MOVW R12, 4(R6)
   458	MOVW R13, 8(R6)
   459	MOVW R14, 12(R6)
   460	ADDU $16, R4
   461	ADDU $16, R5
   462	ADDU $16, R6
   463	SUBU $1, R3
   464	BNE R3, loop4cont
   465loop4done:
   466	MOVW R2, c+44(FP)
   467	RET

View as plain text