...
Run Format

Text file src/internal/bytealg/compare_riscv64.s

Documentation: internal/bytealg

     1// Copyright 2022 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
     9	// X10 = a_base
    10	// X11 = a_len
    11	// X12 = a_cap (unused)
    12	// X13 = b_base (want in X12)
    13	// X14 = b_len (want in X13)
    14	// X15 = b_cap (unused)
    15	MOV	X13, X12
    16	MOV	X14, X13
    17	JMP	compare<>(SB)
    18
    19TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    20	// X10 = a_base
    21	// X11 = a_len
    22	// X12 = b_base
    23	// X13 = b_len
    24	JMP	compare<>(SB)
    25
    26// On entry:
    27// X10 points to start of a
    28// X11 length of a
    29// X12 points to start of b
    30// X13 length of b
    31// return value in X10 (-1/0/1)
    32TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    33	BEQ	X10, X12, cmp_len
    34
    35	MIN	X11, X13, X5
    36	BEQZ	X5, cmp_len
    37
    38	MOV	$32, X6
    39	BLT	X5, X6, check8_unaligned
    40
    41	// Check alignment - if alignment differs we have to do one byte at a time.
    42	AND	$7, X10, X7
    43	AND	$7, X12, X8
    44	BNE	X7, X8, check8_unaligned
    45	BEQZ	X7, compare32
    46
    47	// Check one byte at a time until we reach 8 byte alignment.
    48	SUB	X7, X0, X7
    49	ADD	$8, X7, X7
    50	SUB	X7, X5, X5
    51align:
    52	SUB	$1, X7
    53	MOVBU	0(X10), X8
    54	MOVBU	0(X12), X9
    55	BNE	X8, X9, cmp
    56	ADD	$1, X10
    57	ADD	$1, X12
    58	BNEZ	X7, align
    59
    60check32:
    61	// X6 contains $32
    62	BLT	X5, X6, compare16
    63compare32:
    64	MOV	0(X10), X15
    65	MOV	0(X12), X16
    66	MOV	8(X10), X17
    67	MOV	8(X12), X18
    68	BNE	X15, X16, cmp8a
    69	BNE	X17, X18, cmp8b
    70	MOV	16(X10), X15
    71	MOV	16(X12), X16
    72	MOV	24(X10), X17
    73	MOV	24(X12), X18
    74	BNE	X15, X16, cmp8a
    75	BNE	X17, X18, cmp8b
    76	ADD	$32, X10
    77	ADD	$32, X12
    78	SUB	$32, X5
    79	BGE	X5, X6, compare32
    80	BEQZ	X5, cmp_len
    81
    82check16:
    83	MOV	$16, X6
    84	BLT	X5, X6, check8_unaligned
    85compare16:
    86	MOV	0(X10), X15
    87	MOV	0(X12), X16
    88	MOV	8(X10), X17
    89	MOV	8(X12), X18
    90	BNE	X15, X16, cmp8a
    91	BNE	X17, X18, cmp8b
    92	ADD	$16, X10
    93	ADD	$16, X12
    94	SUB	$16, X5
    95	BEQZ	X5, cmp_len
    96
    97check8_unaligned:
    98	MOV	$8, X6
    99	BLT	X5, X6, check4_unaligned
   100compare8_unaligned:
   101	MOVBU	0(X10), X8
   102	MOVBU	1(X10), X15
   103	MOVBU	2(X10), X17
   104	MOVBU	3(X10), X19
   105	MOVBU	4(X10), X21
   106	MOVBU	5(X10), X23
   107	MOVBU	6(X10), X25
   108	MOVBU	7(X10), X29
   109	MOVBU	0(X12), X9
   110	MOVBU	1(X12), X16
   111	MOVBU	2(X12), X18
   112	MOVBU	3(X12), X20
   113	MOVBU	4(X12), X22
   114	MOVBU	5(X12), X24
   115	MOVBU	6(X12), X28
   116	MOVBU	7(X12), X30
   117	BNE	X8, X9, cmp1a
   118	BNE	X15, X16, cmp1b
   119	BNE	X17, X18, cmp1c
   120	BNE	X19, X20, cmp1d
   121	BNE	X21, X22, cmp1e
   122	BNE	X23, X24, cmp1f
   123	BNE	X25, X28, cmp1g
   124	BNE	X29, X30, cmp1h
   125	ADD	$8, X10
   126	ADD	$8, X12
   127	SUB	$8, X5
   128	BGE	X5, X6, compare8_unaligned
   129	BEQZ	X5, cmp_len
   130
   131check4_unaligned:
   132	MOV	$4, X6
   133	BLT	X5, X6, compare1
   134compare4_unaligned:
   135	MOVBU	0(X10), X8
   136	MOVBU	1(X10), X15
   137	MOVBU	2(X10), X17
   138	MOVBU	3(X10), X19
   139	MOVBU	0(X12), X9
   140	MOVBU	1(X12), X16
   141	MOVBU	2(X12), X18
   142	MOVBU	3(X12), X20
   143	BNE	X8, X9, cmp1a
   144	BNE	X15, X16, cmp1b
   145	BNE	X17, X18, cmp1c
   146	BNE	X19, X20, cmp1d
   147	ADD	$4, X10
   148	ADD	$4, X12
   149	SUB	$4, X5
   150	BGE	X5, X6, compare4_unaligned
   151
   152compare1:
   153	BEQZ	X5, cmp_len
   154	MOVBU	0(X10), X8
   155	MOVBU	0(X12), X9
   156	BNE	X8, X9, cmp
   157	ADD	$1, X10
   158	ADD	$1, X12
   159	SUB	$1, X5
   160	JMP	compare1
   161
   162	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   163cmp8a:
   164	MOV	X15, X17
   165	MOV	X16, X18
   166
   167	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   168cmp8b:
   169	MOV	$0xff, X19
   170cmp8_loop:
   171	AND	X17, X19, X8
   172	AND	X18, X19, X9
   173	BNE	X8, X9, cmp
   174	SLLI	$8, X19
   175	JMP	cmp8_loop
   176
   177cmp1a:
   178	SLTU	X9, X8, X5
   179	SLTU	X8, X9, X6
   180	JMP	cmp_ret
   181cmp1b:
   182	SLTU	X16, X15, X5
   183	SLTU	X15, X16, X6
   184	JMP	cmp_ret
   185cmp1c:
   186	SLTU	X18, X17, X5
   187	SLTU	X17, X18, X6
   188	JMP	cmp_ret
   189cmp1d:
   190	SLTU	X20, X19, X5
   191	SLTU	X19, X20, X6
   192	JMP	cmp_ret
   193cmp1e:
   194	SLTU	X22, X21, X5
   195	SLTU	X21, X22, X6
   196	JMP	cmp_ret
   197cmp1f:
   198	SLTU	X24, X23, X5
   199	SLTU	X23, X24, X6
   200	JMP	cmp_ret
   201cmp1g:
   202	SLTU	X28, X25, X5
   203	SLTU	X25, X28, X6
   204	JMP	cmp_ret
   205cmp1h:
   206	SLTU	X30, X29, X5
   207	SLTU	X29, X30, X6
   208	JMP	cmp_ret
   209
   210cmp_len:
   211	MOV	X11, X8
   212	MOV	X13, X9
   213cmp:
   214	SLTU	X9, X8, X5
   215	SLTU	X8, X9, X6
   216cmp_ret:
   217	SUB	X5, X6, X10
   218	RET

View as plain text