...
Run Format

Text file src/internal/bytealg/equal_loong64.s

Documentation: internal/bytealg

     1// Copyright 2022 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8#define	REGCTXT	R29
     9
    10// memequal(a, b unsafe.Pointer, size uintptr) bool
    11TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
    12	// R4 = a_base
    13	// R5 = b_base
    14	// R6 = size
    15	JMP	equalbody<>(SB)
    16
    17// memequal_varlen(a, b unsafe.Pointer) bool
    18TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0
    19	// R4 = a_base
    20	// R5 = b_base
    21	MOVV	8(REGCTXT), R6    // compiler stores size at offset 8 in the closure
    22	JMP	equalbody<>(SB)
    23
    24// input:
    25//   R4 = a_base
    26//   R5 = b_base
    27//   R6 = size
    28TEXT equalbody<>(SB),NOSPLIT|NOFRAME,$0
    29	// a_base == b_base
    30	BEQ	R4, R5, equal
    31	// 0 bytes
    32	BEQ	R6, equal
    33
    34	MOVV	$64, R7
    35	BGE	R6, R7, lasx
    36
    37	// size < 64 bytes
    38tail:
    39	MOVV	$16, R7
    40	BLT	R6, R7, lt_16
    41generic16_loop:
    42	ADDV	$-16, R6
    43	MOVV	0(R4), R8
    44	MOVV	8(R4), R9
    45	MOVV	0(R5), R10
    46	MOVV	8(R5), R11
    47	BNE	R8, R10, not_equal
    48	BNE	R9, R11, not_equal
    49	BEQ	R6, equal
    50	ADDV	$16, R4
    51	ADDV	$16, R5
    52	BGE	R6, R7, generic16_loop
    53
    54	// size < 16 bytes
    55lt_16:
    56	MOVV	$8, R7
    57	BLT	R6, R7, lt_8
    58	ADDV	$-8, R6
    59	MOVV	0(R4), R8
    60	MOVV	0(R5), R9
    61	BNE	R8, R9, not_equal
    62	BEQ	R6, equal
    63	ADDV	$8, R4
    64	ADDV	$8, R5
    65
    66	// size < 8 bytes
    67lt_8:
    68	MOVV	$4, R7
    69	BLT	R6, R7, lt_4
    70	ADDV	$-4, R6
    71	MOVW	0(R4), R8
    72	MOVW	0(R5), R9
    73	BNE	R8, R9, not_equal
    74	BEQ	R6, equal
    75	ADDV	$4, R4
    76	ADDV	$4, R5
    77
    78	// size < 4 bytes
    79lt_4:
    80	MOVV	$2, R7
    81	BLT	R6, R7, lt_2
    82	ADDV	$-2, R6
    83	MOVH	0(R4), R8
    84	MOVH	0(R5), R9
    85	BNE	R8, R9, not_equal
    86	BEQ	R6, equal
    87	ADDV	$2, R4
    88	ADDV	$2, R5
    89
    90	// size < 2 bytes
    91lt_2:
    92	MOVB	0(R4), R8
    93	MOVB	0(R5), R9
    94	BNE	R8, R9, not_equal
    95
    96equal:
    97	MOVV	$1, R4
    98	RET
    99
   100not_equal:
   101	MOVV	R0, R4
   102	RET
   103
   104	// Implemented using 256-bit SIMD instructions
   105lasx:
   106	MOVBU   internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R7
   107	BEQ	R7, lsx
   108
   109lasx256:
   110	MOVV	$256, R7
   111	BLT	R6, R7, lasx64
   112lasx256_loop:
   113	ADDV	$-256, R6
   114	XVMOVQ	0(R4), X0
   115	XVMOVQ	32(R4), X1
   116	XVMOVQ	64(R4), X2
   117	XVMOVQ	96(R4), X3
   118	XVMOVQ	128(R4), X4
   119	XVMOVQ	160(R4), X5
   120	XVMOVQ	192(R4), X6
   121	XVMOVQ	224(R4), X7
   122	XVMOVQ	0(R5), X8
   123	XVMOVQ	32(R5), X9
   124	XVMOVQ	64(R5), X10
   125	XVMOVQ	96(R5), X11
   126	XVMOVQ	128(R5), X12
   127	XVMOVQ	160(R5), X13
   128	XVMOVQ	192(R5), X14
   129	XVMOVQ	224(R5), X15
   130	XVSEQV	X0, X8, X0
   131	XVSEQV	X1, X9, X1
   132	XVSEQV	X2, X10, X2
   133	XVSEQV	X3, X11, X3
   134	XVSEQV	X4, X12, X4
   135	XVSEQV	X5, X13, X5
   136	XVSEQV	X6, X14, X6
   137	XVSEQV	X7, X15, X7
   138	XVANDV	X0, X1, X0
   139	XVANDV	X2, X3, X2
   140	XVANDV	X4, X5, X4
   141	XVANDV	X6, X7, X6
   142	XVANDV	X0, X2, X0
   143	XVANDV	X4, X6, X4
   144	XVANDV	X0, X4, X0
   145	XVSETALLNEV	X0, FCC0
   146	BFPF	not_equal
   147	BEQ	R6, equal
   148	ADDV	$256, R4
   149	ADDV	$256, R5
   150	BGE	R6, R7, lasx256_loop
   151
   152lasx64:
   153	MOVV	$64, R7
   154	BLT	R6, R7, tail
   155lasx64_loop:
   156	ADDV	$-64, R6
   157	XVMOVQ	0(R4), X0
   158	XVMOVQ	32(R4), X1
   159	XVMOVQ	0(R5), X2
   160	XVMOVQ	32(R5), X3
   161	XVSEQV	X0, X2, X0
   162	XVSEQV	X1, X3, X1
   163	XVANDV	X0, X1, X0
   164	XVSETALLNEV	X0, FCC0
   165	BFPF	not_equal
   166	BEQ	R6, equal
   167	ADDV	$64, R4
   168	ADDV	$64, R5
   169	BGE	R6, R7, lasx64_loop
   170	JMP	tail
   171
   172	// Implemented using 128-bit SIMD instructions
   173lsx:
   174	MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R7
   175	BEQ	R7, generic64_loop
   176
   177lsx128:
   178	MOVV	$128, R7
   179	BLT	R6, R7, lsx32
   180lsx128_loop:
   181	ADDV	$-128, R6
   182	VMOVQ	0(R4), V0
   183	VMOVQ	16(R4), V1
   184	VMOVQ	32(R4), V2
   185	VMOVQ	48(R4), V3
   186	VMOVQ	64(R4), V4
   187	VMOVQ	80(R4), V5
   188	VMOVQ	96(R4), V6
   189	VMOVQ	112(R4), V7
   190	VMOVQ	0(R5), V8
   191	VMOVQ	16(R5), V9
   192	VMOVQ	32(R5), V10
   193	VMOVQ	48(R5), V11
   194	VMOVQ	64(R5), V12
   195	VMOVQ	80(R5), V13
   196	VMOVQ	96(R5), V14
   197	VMOVQ	112(R5), V15
   198	VSEQV	V0, V8, V0
   199	VSEQV	V1, V9, V1
   200	VSEQV	V2, V10, V2
   201	VSEQV	V3, V11, V3
   202	VSEQV	V4, V12, V4
   203	VSEQV	V5, V13, V5
   204	VSEQV	V6, V14, V6
   205	VSEQV	V7, V15, V7
   206	VANDV	V0, V1, V0
   207	VANDV	V2, V3, V2
   208	VANDV	V4, V5, V4
   209	VANDV	V6, V7, V6
   210	VANDV	V0, V2, V0
   211	VANDV	V4, V6, V4
   212	VANDV	V0, V4, V0
   213	VSETALLNEV	V0, FCC0
   214	BFPF	not_equal
   215	BEQ	R6, equal
   216
   217	ADDV	$128, R4
   218	ADDV	$128, R5
   219	BGE	R6, R7, lsx128_loop
   220
   221lsx32:
   222	MOVV	$32, R7
   223	BLT	R6, R7, tail
   224lsx32_loop:
   225	ADDV	$-32, R6
   226	VMOVQ	0(R4), V0
   227	VMOVQ	16(R4), V1
   228	VMOVQ	0(R5), V2
   229	VMOVQ	16(R5), V3
   230	VSEQV	V0, V2, V0
   231	VSEQV	V1, V3, V1
   232	VANDV	V0, V1, V0
   233	VSETALLNEV	V0, FCC0
   234	BFPF	not_equal
   235	BEQ	R6, equal
   236	ADDV	$32, R4
   237	ADDV	$32, R5
   238	BGE	R6, R7, lsx32_loop
   239	JMP tail
   240
   241	// Implemented using general instructions
   242generic64_loop:
   243	ADDV	$-64, R6
   244	MOVV	0(R4), R7
   245	MOVV	8(R4), R8
   246	MOVV	16(R4), R9
   247	MOVV	24(R4), R10
   248	MOVV	0(R5), R15
   249	MOVV	8(R5), R16
   250	MOVV	16(R5), R17
   251	MOVV	24(R5), R18
   252	BNE	R7, R15, not_equal
   253	BNE	R8, R16, not_equal
   254	BNE	R9, R17, not_equal
   255	BNE	R10, R18, not_equal
   256	MOVV	32(R4), R11
   257	MOVV	40(R4), R12
   258	MOVV	48(R4), R13
   259	MOVV	56(R4), R14
   260	MOVV	32(R5), R19
   261	MOVV	40(R5), R20
   262	MOVV	48(R5), R21
   263	MOVV	56(R5), R23
   264	BNE	R11, R19, not_equal
   265	BNE	R12, R20, not_equal
   266	BNE	R13, R21, not_equal
   267	BNE	R14, R23, not_equal
   268	BEQ	R6, equal
   269	ADDV	$64, R4
   270	ADDV	$64, R5
   271	MOVV	$64, R7
   272	BGE	R6, R7, generic64_loop
   273	JMP tail

View as plain text