...
Run Format

Text file src/internal/runtime/gc/scan/filter_amd64.s

Documentation: internal/runtime/gc/scan

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8TEXT ·FilterNilAVX512(SB), NOSPLIT, $0-20
     9	// Load arguments
    10	MOVQ bufp+0(FP), R8	// R8 = bufp (start of the uint64 array)
    11	MOVL n+8(FP), R9	// R9 = n (total length)
    12	XORL R10, R10		// R10 = 0 (scanned = 0)
    13	XORL R11, R11		// R11 = 0 (cnt = 0)
    14
    15	MOVL R9, R12	// R12 = n
    16	SUBL R10, R12	// R12 = n - scanned
    17	CMPL R12, $8	// Compare (n - scanned) with 8
    18	JLT scalar_loop	// If (n - scanned) < 8, jump to the scalar cleanup
    19	VPXOR X15, X15, X15	// Zero the high bits of Z15
    20
    21vector_loop:
    22	LEAQ (R8)(R10*8), R13	// R13 = buf[scanned:] address
    23	VMOVDQU64 (R13), Z1		// Z1 = v (Load 8 uint64s)
    24	VPCMPUQ $4, Z1, Z15, K1	// Z15 is always 0, compare Z1 with 0, results in K1.
    25
    26	LEAQ (R8)(R11*8), R14	// R14 = buf[cnt:] address
    27	VPCOMPRESSQ Z1, K1, Z1	// compress v
    28	VMOVDQU64 Z1, (R14)		// store v to buf[cnt:]
    29
    30	KMOVW K1, R15
    31	POPCNTL R15, R15	// R15 = popcount(K1)
    32
    33	ADDL R15, R11	// cnt += popcount(K1)
    34	ADDL $8, R10	// scanned += 8
    35
    36	MOVL R9, R12	// R12 = n
    37	SUBL R10, R12	// R12 = n - scanned
    38	CMPL R12, $8	// Compare (n - scanned) with 8
    39	JGE vector_loop	// If (n - scanned) >= 8, continue loop
    40
    41scalar_loop:
    42	CMPL R10, R9	// Compare scanned with n
    43	JGE end			// If scanned >= n, loop is done
    44
    45scalar_next_i:
    46	LEAQ (R8)(R10*8), R13	// R13 = &buf[scanned]
    47	MOVQ (R13), R14			// R14 = buf[scanned]
    48
    49	CMPQ R14, $0
    50	JE scalar_increment_i	// If buf[i] == 0, skip to increment i
    51
    52	LEAQ (R8)(R11*8), R15	// R15 = &buf[cnt]
    53	MOVQ R14, (R15)			// buf[cnt] = buf[scanned]
    54
    55	ADDL $1, R11	// cnt++
    56
    57scalar_increment_i:
    58	ADDL $1, R10	// scanned++
    59
    60	CMPL R10, R9
    61	JL scalar_next_i	// if scanned < n, continue
    62
    63end:
    64	MOVL R11, ret+16(FP)
    65	VZEROUPPER
    66	RET

View as plain text