...
Run Format

Text file src/internal/bytealg/compare_loong64.s

Documentation: internal/bytealg

     1// Copyright 2022 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
     9	// R4 = a_base
    10	// R5 = a_len
    11	// R6 = a_cap (unused)
    12	// R7 = b_base (want in R6)
    13	// R8 = b_len (want in R7)
    14	// R9 = b_cap (unused)
    15	MOVV	R7, R6
    16	MOVV	R8, R7
    17	JMP	cmpbody<>(SB)
    18
    19TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
    20	// R4 = a_base
    21	// R5 = a_len
    22	// R6 = b_base
    23	// R7 = b_len
    24	JMP	cmpbody<>(SB)
    25
    26// input:
    27//    R4: points to the start of a
    28//    R5: length of a
    29//    R6: points to the start of b
    30//    R7: length of b
    31// for regabi the return value (-1/0/1) in R4
    32TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
    33	BEQ	R4, R6, cmp_len	// same start of a and b, then compare lengths
    34
    35	SGTU	R5, R7, R9
    36	BNE	R9, b_lt_a
    37	MOVV	R5, R14
    38	JMP	entry
    39
    40b_lt_a:
    41	MOVV	R7, R14
    42
    43entry:
    44	BEQ	R14, cmp_len	// minlength is 0
    45
    46	MOVV	$32, R15
    47	BGE	R14, R15, lasx
    48tail:
    49	MOVV	$8, R15
    50	BLT	R14, R15, lt_8
    51generic8_loop:
    52	MOVV	(R4), R10
    53	MOVV	(R6), R11
    54	BEQ	R10, R11, generic8_equal
    55
    56cmp8:
    57	AND	$0xff, R10, R16
    58	AND	$0xff, R11, R17
    59	BNE	R16, R17, cmp_byte
    60
    61	BSTRPICKV	$15, R10, $8, R16
    62	BSTRPICKV	$15, R11, $8, R17
    63	BNE	R16, R17, cmp_byte
    64
    65	BSTRPICKV	$23, R10, $16, R16
    66	BSTRPICKV	$23, R11, $16, R17
    67	BNE	R16, R17, cmp_byte
    68
    69	BSTRPICKV	$31, R10, $24, R16
    70	BSTRPICKV	$31, R11, $24, R17
    71	BNE	R16, R17, cmp_byte
    72
    73	BSTRPICKV	$39, R10, $32, R16
    74	BSTRPICKV	$39, R11, $32, R17
    75	BNE	R16, R17, cmp_byte
    76
    77	BSTRPICKV	$47, R10, $40, R16
    78	BSTRPICKV	$47, R11, $40, R17
    79	BNE	R16, R17, cmp_byte
    80
    81	BSTRPICKV	$55, R10, $48, R16
    82	BSTRPICKV	$55, R11, $48, R17
    83	BNE	R16, R17, cmp_byte
    84
    85	BSTRPICKV	$63, R10, $56, R16
    86	BSTRPICKV	$63, R11, $56, R17
    87	BNE	R16, R17, cmp_byte
    88
    89generic8_equal:
    90	ADDV	$-8, R14
    91	BEQ	R14, cmp_len
    92	ADDV	$8, R4
    93	ADDV	$8, R6
    94	BGE	R14, R15, generic8_loop
    95
    96lt_8:
    97	MOVV	$4, R15
    98	BLT	R14, R15, lt_4
    99
   100	MOVWU	(R4), R10
   101	MOVWU	(R6), R11
   102	BEQ	R10, R11, lt_8_equal
   103
   104	AND	$0xff, R10, R16
   105	AND	$0xff, R11, R17
   106	BNE	R16, R17, cmp_byte
   107
   108	BSTRPICKV	$15, R10, $8, R16
   109	BSTRPICKV	$15, R11, $8, R17
   110	BNE	R16, R17, cmp_byte
   111
   112	BSTRPICKV	$23, R10, $16, R16
   113	BSTRPICKV	$23, R11, $16, R17
   114	BNE	R16, R17, cmp_byte
   115
   116	BSTRPICKV	$31, R10, $24, R16
   117	BSTRPICKV	$31, R11, $24, R17
   118	BNE	R16, R17, cmp_byte
   119
   120lt_8_equal:
   121	ADDV	$-4, R14
   122	BEQ	R14, cmp_len
   123	ADDV	$4, R4
   124	ADDV	$4, R6
   125
   126lt_4:
   127	MOVV	$2, R15
   128	BLT	R14, R15, lt_2
   129
   130	MOVHU	(R4), R10
   131	MOVHU	(R6), R11
   132	BEQ	R10, R11, lt_4_equal
   133
   134	AND	$0xff, R10, R16
   135	AND	$0xff, R11, R17
   136	BNE	R16, R17, cmp_byte
   137
   138	BSTRPICKV	$15, R10, $8, R16
   139	BSTRPICKV	$15, R11, $8, R17
   140	BNE	R16, R17, cmp_byte
   141
   142lt_4_equal:
   143	ADDV	$-2, R14
   144	BEQ	R14, cmp_len
   145	ADDV	$2, R4
   146	ADDV	$2, R6
   147
   148lt_2:
   149	MOVBU	(R4), R16
   150	MOVBU	(R6), R17
   151	BNE	R16, R17, cmp_byte
   152	JMP	cmp_len
   153
   154	// Compare 1 byte taken from R16/R17 that are known to differ.
   155cmp_byte:
   156	SGTU	R16, R17, R4	// R4 = 1 if (R16 > R17)
   157	BNE	R0, R4, ret
   158	MOVV	$-1, R4
   159	RET
   160
   161cmp_len:
   162	SGTU	R5, R7, R8
   163	SGTU	R7, R5, R9
   164	SUBV	R9, R8, R4
   165
   166ret:
   167	RET
   168
   169lasx:
   170	MOVV	$64, R20
   171	MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9
   172	BEQ	R9, lsx
   173
   174	MOVV	$128, R15
   175	BLT	R14, R15, lasx32_loop
   176lasx128_loop:
   177	XVMOVQ	(R4), X0
   178	XVMOVQ	(R6), X1
   179	XVSEQB	X0, X1, X0
   180	XVSETANYEQB	X0, FCC0
   181	BFPT	lasx_found_0
   182
   183	XVMOVQ	32(R4), X0
   184	XVMOVQ	32(R6), X1
   185	XVSEQB	X0, X1, X0
   186	XVSETANYEQB	X0, FCC0
   187	BFPT	lasx_found_32
   188
   189	XVMOVQ	64(R4), X0
   190	XVMOVQ	64(R6), X1
   191	XVSEQB	X0, X1, X0
   192	XVSETANYEQB	X0, FCC0
   193	BFPT	lasx_found_64
   194
   195	XVMOVQ	96(R4), X0
   196	XVMOVQ	96(R6), X1
   197	XVSEQB	X0, X1, X0
   198	XVSETANYEQB	X0, FCC0
   199	BFPT	lasx_found_96
   200
   201	ADDV	$-128, R14
   202	BEQ	R14, cmp_len
   203	ADDV	$128, R4
   204	ADDV	$128, R6
   205	BGE	R14, R15, lasx128_loop
   206
   207	MOVV	$32, R15
   208	BLT	R14, R15, tail
   209lasx32_loop:
   210	XVMOVQ	(R4), X0
   211	XVMOVQ	(R6), X1
   212	XVSEQB	X0, X1, X0
   213	XVSETANYEQB	X0, FCC0
   214	BFPT	lasx_found_0
   215
   216	ADDV	$-32, R14
   217	BEQ	R14, cmp_len
   218	ADDV	$32, R4
   219	ADDV	$32, R6
   220	BGE	R14, R15, lasx32_loop
   221	JMP	tail
   222
   223lasx_found_0:
   224	MOVV	R0, R11
   225	JMP	lasx_find_byte
   226
   227lasx_found_32:
   228	MOVV	$32, R11
   229	JMP	lasx_find_byte
   230
   231lasx_found_64:
   232	MOVV	$64, R11
   233	JMP	lasx_find_byte
   234
   235lasx_found_96:
   236	MOVV	$96, R11
   237
   238lasx_find_byte:
   239	XVMOVQ	X0.V[0], R10
   240	CTOV	R10, R10
   241	BNE	R10, R20, find_byte
   242	ADDV	$8, R11
   243
   244	XVMOVQ	X0.V[1], R10
   245	CTOV	R10, R10
   246	BNE	R10, R20, find_byte
   247	ADDV	$8, R11
   248
   249	XVMOVQ	X0.V[2], R10
   250	CTOV	R10, R10
   251	BNE	R10, R20, find_byte
   252	ADDV	$8, R11
   253
   254	XVMOVQ	X0.V[3], R10
   255	CTOV	R10, R10
   256	JMP	find_byte
   257
   258lsx:
   259	MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9
   260	BEQ	R9, generic32_loop
   261
   262	MOVV	$64, R15
   263	BLT	R14, R15, lsx16_loop
   264lsx64_loop:
   265	VMOVQ	(R4), V0
   266	VMOVQ	(R6), V1
   267	VSEQB	V0, V1, V0
   268	VSETANYEQB	V0, FCC0
   269	BFPT	lsx_found_0
   270
   271	VMOVQ	16(R4), V0
   272	VMOVQ	16(R6), V1
   273	VSEQB	V0, V1, V0
   274	VSETANYEQB	V0, FCC0
   275	BFPT	lsx_found_16
   276
   277	VMOVQ	32(R4), V0
   278	VMOVQ	32(R6), V1
   279	VSEQB	V0, V1, V0
   280	VSETANYEQB	V0, FCC0
   281	BFPT	lsx_found_32
   282
   283	VMOVQ	48(R4), V0
   284	VMOVQ	48(R6), V1
   285	VSEQB	V0, V1, V0
   286	VSETANYEQB	V0, FCC0
   287	BFPT	lsx_found_48
   288
   289	ADDV	$-64, R14
   290	BEQ	R14, cmp_len
   291	ADDV	$64, R4
   292	ADDV	$64, R6
   293	BGE	R14, R15, lsx64_loop
   294
   295	MOVV	$16, R15
   296	BLT	R14, R15, tail
   297lsx16_loop:
   298	VMOVQ	(R4), V0
   299	VMOVQ	(R6), V1
   300	VSEQB	V0, V1, V0
   301	VSETANYEQB	V0, FCC0
   302	BFPT	lsx_found_0
   303
   304	ADDV	$-16, R14
   305	BEQ	R14, cmp_len
   306	ADDV	$16, R4
   307	ADDV	$16, R6
   308	BGE	R14, R15, lsx16_loop
   309	JMP	tail
   310
   311lsx_found_0:
   312	MOVV	R0, R11
   313	JMP	lsx_find_byte
   314
   315lsx_found_16:
   316	MOVV	$16, R11
   317	JMP	lsx_find_byte
   318
   319lsx_found_32:
   320	MOVV	$32, R11
   321	JMP	lsx_find_byte
   322
   323lsx_found_48:
   324	MOVV	$48, R11
   325
   326lsx_find_byte:
   327	VMOVQ	V0.V[0], R10
   328	CTOV	R10, R10
   329	BNE	R10, R20, find_byte
   330	ADDV	$8, R11
   331
   332	VMOVQ	V0.V[1], R10
   333	CTOV	R10, R10
   334
   335find_byte:
   336	SRLV	$3, R10
   337	ADDV	R10, R11
   338	ADDV	R11, R4
   339	ADDV	R11, R6
   340	MOVB	(R4), R16
   341	MOVB	(R6), R17
   342	JMP	cmp_byte
   343
   344generic32_loop:
   345	MOVV	(R4), R10
   346	MOVV	(R6), R11
   347	BNE	R10, R11, cmp8
   348	MOVV	8(R4), R10
   349	MOVV	8(R6), R11
   350	BNE	R10, R11, cmp8
   351	MOVV	16(R4), R10
   352	MOVV	16(R6), R11
   353	BNE	R10, R11, cmp8
   354	MOVV	24(R4), R10
   355	MOVV	24(R6), R11
   356	BNE	R10, R11, cmp8
   357	ADDV	$-32, R14
   358	BEQ	R14, cmp_len
   359	ADDV	$32, R4
   360	ADDV	$32, R6
   361	MOVV	$32, R15
   362	BGE	R14, R15, generic32_loop
   363	JMP	tail

View as plain text