...
Run Format

Text file src/internal/bytealg/index_loong64.s

Documentation: internal/bytealg

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8TEXT ·Index<ABIInternal>(SB),NOSPLIT,$0-56
     9	MOVV	R7, R6		// R6 = separator pointer
    10	MOVV	R8, R7		// R7 = separator length
    11	JMP	indexbody<>(SB)
    12
    13TEXT ·IndexString<ABIInternal>(SB),NOSPLIT,$0-40
    14	JMP	indexbody<>(SB)
    15
    16// input:
    17//   R4 = string
    18//   R5 = length
    19//   R6 = separator pointer
    20//   R7 = separator length (2 <= len <= 64)
    21TEXT indexbody<>(SB),NOSPLIT,$0
    22	// main idea is to load 'sep' into separate register(s)
    23	// to avoid repeatedly re-load it again and again
    24	// for sebsequent substring comparisons
    25	SUBV	R7, R5, R8
    26	ADDV	R4, R8		// R8 contains the start of last substring for comparison
    27	ADDV	$1, R4, R9	// store base for later
    28
    29	MOVV	$8, R5
    30	BGE	R7, R5, len_gt_or_eq_8
    31len_2_7:
    32	AND	$0x4, R7, R5
    33	BNE	R5, len_4_7
    34
    35len_2_3:
    36	AND	$0x1, R7, R5
    37	BNE	R5, len_3
    38
    39len_2:
    40	MOVHU	(R6), R10
    41loop_2:
    42	BLT	R8, R4, not_found
    43	MOVHU	(R4), R11
    44	ADDV	$1, R4
    45	BNE	R10, R11, loop_2
    46	JMP	found
    47
    48len_3:
    49	MOVHU	(R6), R10
    50	MOVBU	2(R6), R11
    51loop_3:
    52	BLT	R8, R4, not_found
    53	MOVHU	(R4), R12
    54	ADDV	$1, R4
    55	BNE	R10, R12, loop_3
    56	MOVBU	1(R4), R13
    57	BNE	R11, R13, loop_3
    58	JMP	found
    59
    60len_4_7:
    61	AND	$0x2, R7, R5
    62	BNE	R5, len_6_7
    63	AND	$0x1, R7, R5
    64	BNE	R5, len_5
    65len_4:
    66	MOVWU	(R6), R10
    67loop_4:
    68	BLT	R8, R4, not_found
    69	MOVWU	(R4), R11
    70	ADDV	$1, R4
    71	BNE	R10, R11, loop_4
    72	JMP	found
    73
    74len_5:
    75	MOVWU	(R6), R10
    76	MOVBU	4(R6), R11
    77loop_5:
    78	BLT	R8, R4, not_found
    79	MOVWU	(R4), R12
    80	ADDV	$1, R4
    81	BNE	R10, R12, loop_5
    82	MOVBU	3(R4), R13
    83	BNE	R11, R13, loop_5
    84	JMP	found
    85
    86len_6_7:
    87	AND	$0x1, R7, R5
    88	BNE	R5, len_7
    89len_6:
    90	MOVWU	(R6), R10
    91	MOVHU	4(R6), R11
    92loop_6:
    93	BLT	R8, R4, not_found
    94	MOVWU	(R4), R12
    95	ADDV	$1, R4
    96	BNE	R10, R12, loop_6
    97	MOVHU	3(R4), R13
    98	BNE	R11, R13, loop_6
    99	JMP	found
   100
   101len_7:
   102	MOVWU	(R6), R10
   103	MOVWU	3(R6), R11
   104loop_7:
   105	BLT	R8, R4, not_found
   106	MOVWU	(R4), R12
   107	ADDV	$1, R4
   108	BNE	R10, R12, loop_7
   109	MOVWU	2(R4), R13
   110	BNE	R11, R13, loop_7
   111	JMP	found
   112
   113len_gt_or_eq_8:
   114	BEQ	R5, R7, len_8
   115	MOVV	$17, R5
   116	BGE	R7, R5, len_gt_or_eq_17
   117	JMP	len_9_16
   118len_8:
   119	MOVV	(R6), R10
   120loop_8:
   121	BLT	R8, R4, not_found
   122	MOVV	(R4), R11
   123	ADDV	$1, R4
   124	BNE	R10, R11, loop_8
   125	JMP	found
   126
   127len_9_16:
   128	MOVV	(R6), R10
   129	SUBV	$8, R7
   130	MOVV	(R6)(R7), R11
   131	SUBV	$1, R7
   132loop_9_16:
   133	BLT	R8, R4, not_found
   134	MOVV	(R4), R12
   135	ADDV	$1, R4
   136	BNE	R10, R12, loop_9_16
   137	MOVV	(R4)(R7), R13
   138	BNE	R11, R13, loop_9_16
   139	JMP	found
   140
   141len_gt_or_eq_17:
   142	MOVV	$25, R5
   143	BGE	R7, R5, len_gt_or_eq_25
   144len_17_24:
   145	MOVV	0(R6), R10
   146	MOVV	8(R6), R11
   147	SUBV	$8, R7
   148	MOVV	(R6)(R7), R12
   149	SUBV	$1, R7
   150loop_17_24:
   151	BLT	R8, R4, not_found
   152	MOVV	(R4), R13
   153	ADDV	$1, R4
   154	BNE	R10, R13, loop_17_24
   155	MOVV	7(R4), R14
   156	BNE	R11, R14, loop_17_24
   157	MOVV	(R4)(R7), R15
   158	BNE	R12, R15, loop_17_24
   159	JMP	found
   160
   161len_gt_or_eq_25:
   162	MOVV	$33, R5
   163	BGE	R7, R5, len_gt_or_eq_33
   164	MOVBU   internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R10
   165	BNE	R10, lsx_len_25_32
   166len_25_32:
   167	MOVV	0(R6), R10
   168	MOVV	8(R6), R11
   169	MOVV	16(R6), R12
   170	SUBV	$8, R7
   171	MOVV	(R6)(R7), R13
   172	SUBV	$1, R7
   173loop_25_32:
   174	BLT	R8, R4, not_found
   175	MOVV	(R4), R14
   176	ADDV	$1, R4
   177	BNE	R10, R14, loop_25_32
   178	MOVV	7(R4), R15
   179	BNE	R11, R15, loop_25_32
   180	MOVV	15(R4), R16
   181	BNE	R12, R16, loop_25_32
   182	MOVV	(R4)(R7), R17
   183	BNE	R13, R17, loop_25_32
   184	JMP	found
   185
   186	// On loong64, LSX is included if LASX is supported.
   187lasx_len_25_32:
   188lsx_len_25_32:
   189	VMOVQ	0(R6), V0
   190	SUBV	$16, R7
   191	VMOVQ	(R6)(R7), V1
   192	SUBV	$1, R7
   193lsx_loop_25_32:
   194	BLT	R8, R4, not_found
   195	VMOVQ	(R4), V2
   196	ADDV	$1, R4
   197	VSEQV	V0, V2, V2
   198	VSETANYEQV	V2, FCC0
   199	BFPT	FCC0, lsx_loop_25_32
   200
   201	VMOVQ	(R4)(R7), V3
   202	VSEQV	V1, V3, V3
   203	VSETANYEQV	V3, FCC1
   204	BFPT	FCC1, lsx_loop_25_32
   205	JMP	found
   206
   207len_gt_or_eq_33:
   208	MOVBU   internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R10
   209	MOVV	$49, R5
   210	BGE	R7, R5, len_gt_or_eq_49
   211len_33_48:
   212	BNE	R10, lasx_len_33_48
   213	JMP	lsx_len_33_48
   214
   215len_gt_or_eq_49:
   216len_49_64:
   217	BNE	R10, lasx_len_49_64
   218	JMP	lsx_len_49_64
   219
   220lsx_len_33_48:
   221	VMOVQ	0(R6), V0
   222	VMOVQ	16(R6), V1
   223	SUBV	$16, R7
   224	VMOVQ	(R6)(R7), V2
   225	SUBV	$1, R7
   226lsx_loop_33_48:
   227	BLT	R8, R4, not_found
   228	VMOVQ	0(R4), V3
   229	ADDV	$1, R4
   230	VSEQV	V0, V3, V3
   231	VSETANYEQV	V3, FCC0
   232	BFPT	FCC0, lsx_loop_33_48
   233
   234	VMOVQ	15(R4), V4
   235	VSEQV	V1, V4, V4
   236	VSETANYEQV	V4, FCC1
   237	BFPT	FCC1, lsx_loop_33_48
   238
   239	VMOVQ	(R4)(R7), V5
   240	VSEQV	V2, V5, V5
   241	VSETANYEQV	V5, FCC2
   242	BFPT	FCC2, lsx_loop_33_48
   243	JMP	found
   244
   245lsx_len_49_64:
   246	VMOVQ	0(R6), V0
   247	VMOVQ	16(R6), V1
   248	VMOVQ	32(R6), V2
   249	SUBV	$16, R7
   250	VMOVQ	(R6)(R7), V3
   251	SUBV	$1, R7
   252lsx_loop_49_64:
   253	BLT	R8, R4, not_found
   254	VMOVQ	0(R4), V4
   255	ADDV	$1, R4
   256	VSEQV	V0, V4, V4
   257	VSETANYEQV	V4, FCC0
   258	BFPT	FCC0, lsx_loop_49_64
   259
   260	VMOVQ	15(R4), V5
   261	VSEQV	V1, V5, V5
   262	VSETANYEQV	V5, FCC1
   263	BFPT	FCC1, lsx_loop_49_64
   264
   265	VMOVQ	31(R4), V6
   266	VSEQV	V2, V6, V6
   267	VSETANYEQV	V6, FCC2
   268	BFPT	FCC2, lsx_loop_49_64
   269
   270	VMOVQ	(R4)(R7), V7
   271	VSEQV	V3, V7, V7
   272	VSETANYEQV	V7, FCC3
   273	BFPT	FCC3, lsx_loop_49_64
   274	JMP	found
   275
   276lasx_len_33_48:
   277lasx_len_49_64:
   278lasx_len_33_64:
   279	XVMOVQ	(R6), X0
   280	SUBV	$32, R7
   281	XVMOVQ	(R6)(R7), X1
   282	SUBV	$1, R7
   283lasx_loop_33_64:
   284	BLT	R8, R4, not_found
   285	XVMOVQ	(R4), X2
   286	ADDV	$1, R4
   287	XVSEQV	X0, X2, X3
   288	XVSETANYEQV	X3, FCC0
   289	BFPT	FCC0, lasx_loop_33_64
   290
   291	XVMOVQ	(R4)(R7), X4
   292	XVSEQV	X1, X4, X5
   293	XVSETANYEQV	X5, FCC1
   294	BFPT	FCC1, lasx_loop_33_64
   295	JMP	found
   296
   297found:
   298	SUBV	R9, R4
   299	RET
   300
   301not_found:
   302	MOVV	$-1, R4
   303	RET

View as plain text