...
Run Format

Text file src/internal/bytealg/indexbyte_loong64.s

Documentation: internal/bytealg

     1// Copyright 2022 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8// input:
     9//   R4 = b_base
    10//   R5 = b_len
    11//   R6 = b_cap (unused)
    12//   R7 = byte to find
    13TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
    14	AND	$0xff, R7
    15	JMP	indexbytebody<>(SB)
    16
    17// input:
    18//   R4 = s_base
    19//   R5 = s_len
    20//   R6 = byte to find
    21TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
    22	AND	$0xff, R6, R7	// byte to find
    23	JMP	indexbytebody<>(SB)
    24
    25// input:
    26//   R4: b_base
    27//   R5: len
    28//   R7: byte to find
    29TEXT indexbytebody<>(SB),NOSPLIT,$0
    30	BEQ	R5, notfound	// len == 0
    31
    32	MOVV	R4, R6		// store base for later
    33	ADDV	R4, R5, R8	// end
    34
    35	MOVV	$32, R9
    36	BGE	R5, R9, lasx
    37tail:
    38	MOVV	$8, R9
    39	BLT	R5, R9, lt_8
    40generic8_loop:
    41	MOVV	(R4), R10
    42
    43	AND	$0xff, R10, R11
    44	BEQ	R7, R11, found
    45
    46	BSTRPICKV	$15, R10, $8, R11
    47	BEQ	R7, R11, byte_1th
    48
    49	BSTRPICKV	$23, R10, $16, R11
    50	BEQ	R7, R11, byte_2th
    51
    52	BSTRPICKV	$31, R10, $24, R11
    53	BEQ	R7, R11, byte_3th
    54
    55	BSTRPICKV	$39, R10, $32, R11
    56	BEQ	R7, R11, byte_4th
    57
    58	BSTRPICKV	$47, R10, $40, R11
    59	BEQ	R7, R11, byte_5th
    60
    61	BSTRPICKV	$55, R10, $48, R11
    62	BEQ	R7, R11, byte_6th
    63
    64	BSTRPICKV	$63, R10, $56, R11
    65	BEQ	R7, R11, byte_7th
    66
    67	ADDV	$8, R4
    68	ADDV	$-8, R5
    69	BGE	R5, R9, generic8_loop
    70
    71lt_8:
    72	BEQ	R4, R8, notfound
    73	MOVBU	(R4), R10
    74	BEQ	R7, R10, found
    75	ADDV	$1, R4
    76	JMP	lt_8
    77
    78byte_1th:
    79	ADDV	$1, R4
    80	SUBV	R6, R4
    81	RET
    82
    83byte_2th:
    84	ADDV	$2, R4
    85	SUBV	R6, R4
    86	RET
    87
    88byte_3th:
    89	ADDV	$3, R4
    90	SUBV	R6, R4
    91	RET
    92
    93byte_4th:
    94	ADDV	$4, R4
    95	SUBV	R6, R4
    96	RET
    97
    98byte_5th:
    99	ADDV	$5, R4
   100	SUBV	R6, R4
   101	RET
   102
   103byte_6th:
   104	ADDV	$6, R4
   105	SUBV	R6, R4
   106	RET
   107
   108byte_7th:
   109	ADDV	$7, R4
   110
   111found:
   112	SUBV	R6, R4
   113	RET
   114
   115notfound:
   116	MOVV	$-1, R4
   117	RET
   118
   119lasx:
   120	MOVBU   internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9
   121	BEQ     R9, lsx
   122	XVMOVQ	R7, X0.B32
   123
   124	MOVV	$128, R9
   125	BLT	R5, R9, lasx32_loop
   126lasx128_loop:
   127	XVMOVQ	0(R4), X1
   128	XVMOVQ	32(R4), X2
   129	XVMOVQ	64(R4), X3
   130	XVMOVQ	96(R4), X4
   131
   132	XVSEQB	X1, X0, X1
   133	XVSETNEV	X1, FCC0
   134	BFPT	lasx_found_add_0
   135
   136	XVSEQB	X2, X0, X1
   137	XVSETNEV	X1, FCC0
   138	BFPT	lasx_found_add_32
   139
   140	XVSEQB	X3, X0, X1
   141	XVSETNEV	X1, FCC0
   142	BFPT	lasx_found_add_64
   143
   144	XVSEQB	X4, X0, X1
   145	XVSETNEV	X1, FCC0
   146	BFPT	lasx_found_add_96
   147
   148	ADDV	$128, R4
   149	ADDV	$-128, R5
   150	BGE	R5, R9, lasx128_loop
   151
   152	BEQ	R5, notfound
   153
   154	MOVV	$32, R9
   155	BLT	R5, R9, tail
   156lasx32_loop:
   157	XVMOVQ	0(R4), X1
   158
   159	XVSEQB	X1, X0, X1
   160	XVSETNEV	X1, FCC0
   161	BFPT	lasx_found_add_0
   162
   163	ADDV	$32, R4
   164	ADDV	$-32, R5
   165	BGE	R5, R9, lasx32_loop
   166
   167	BEQ	R5, notfound
   168
   169	JMP	tail
   170
   171lasx_found_add_0:
   172	MOVV	R0, R11
   173	JMP	lasx_index_cal
   174
   175lasx_found_add_32:
   176	MOVV	$32, R11
   177	JMP	lasx_index_cal
   178
   179lasx_found_add_64:
   180	MOVV	$64, R11
   181	JMP	lasx_index_cal
   182
   183lasx_found_add_96:
   184	MOVV	$96, R11
   185	JMP	lasx_index_cal
   186
   187lasx_index_cal:
   188	MOVV	$64, R9
   189	XVMOVQ	X1.V[0], R10
   190	CTZV	R10, R10
   191	BNE	R10, R9, index_cal
   192	ADDV	$8, R11
   193
   194	XVMOVQ	X1.V[1], R10
   195	CTZV	R10, R10
   196	BNE	R10, R9, index_cal
   197	ADDV	$8, R11
   198
   199	XVMOVQ	X1.V[2], R10
   200	CTZV	R10, R10
   201	BNE	R10, R9, index_cal
   202	ADDV	$8, R11
   203
   204	XVMOVQ	X1.V[3], R10
   205	CTZV	R10, R10
   206	JMP	index_cal
   207
   208lsx:
   209	MOVBU   internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9
   210	BEQ     R9, tail
   211	VMOVQ	R7, V0.B16
   212
   213	MOVV	$64, R9
   214	BLT	R5, R9, lsx16_loop
   215lsx64_loop:
   216	VMOVQ	0(R4), V1
   217	VMOVQ	16(R4), V2
   218	VMOVQ	32(R4), V3
   219	VMOVQ	48(R4), V4
   220
   221	VSEQB	V1, V0, V1
   222	VSETNEV	V1, FCC0
   223	BFPT	lsx_found_add_0
   224
   225	VSEQB	V2, V0, V1
   226	VSETNEV	V1, FCC0
   227	BFPT	lsx_found_add_16
   228
   229	VSEQB	V3, V0, V1
   230	VSETNEV	V1, FCC0
   231	BFPT	lsx_found_add_32
   232
   233	VSEQB	V4, V0, V1
   234	VSETNEV	V1, FCC0
   235	BFPT	lsx_found_add_48
   236
   237	ADDV	$64, R4
   238	ADDV	$-64, R5
   239	BGE	R5, R9, lsx64_loop
   240
   241	BEQ	R5, notfound
   242
   243	MOVV	$16, R9
   244	BLT	R5, R9, tail
   245lsx16_loop:
   246	VMOVQ	0(R4), V1
   247
   248	VSEQB	V1, V0, V1
   249	VSETNEV	V1, FCC0
   250	BFPT	lsx_found_add_0
   251
   252	ADDV	$16, R4
   253	ADDV	$-16, R5
   254	BGE	R5, R9, lsx16_loop
   255
   256	BEQ	R5, notfound
   257
   258	JMP	tail
   259
   260lsx_found_add_0:
   261	MOVV	R0, R11
   262	JMP	lsx_index_cal
   263
   264lsx_found_add_16:
   265	MOVV	$16, R11
   266	JMP	lsx_index_cal
   267
   268lsx_found_add_32:
   269	MOVV	$32, R11
   270	JMP	lsx_index_cal
   271
   272lsx_found_add_48:
   273	MOVV	$48, R11
   274	JMP	lsx_index_cal
   275
   276lsx_index_cal:
   277	MOVV	$64, R9
   278
   279	VMOVQ	V1.V[0], R10
   280	CTZV	R10, R10
   281	BNE	R10, R9, index_cal
   282	ADDV	$8, R11
   283
   284	VMOVQ	V1.V[1], R10
   285	CTZV	R10, R10
   286	JMP	index_cal
   287
   288index_cal:
   289	SRLV	$3, R10
   290	ADDV	R11, R10
   291	ADDV	R10, R4
   292	JMP	found

View as plain text