Text file
src/runtime/race_riscv64.s
Documentation: runtime
1// Copyright 2025 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build race
6
7#include "go_asm.h"
8#include "funcdata.h"
9#include "textflag.h"
10
11// The following thunks allow calling the gcc-compiled race runtime directly
12// from Go code without going all the way through cgo.
13// First, it's much faster (up to 50% speedup for real Go programs).
14// Second, it eliminates race-related special cases from cgocall and scheduler.
15// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
16
17// A brief recap of the riscv C calling convention.
18// Arguments are passed in X10...X17
19// Callee-saved registers are: X8, X9, X18..X27
20// Temporary registers are: X5..X7, X28..X31
21
22// When calling racecalladdr, X11 is the call target address.
23
24// The race ctx, ThreadState *thr below, is passed in X10 and loaded in racecalladdr.
25
26// func runtime·raceread(addr uintptr)
27// Called from instrumented code.
28TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
29 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
30 MOV $__tsan_read(SB), X5
31 MOV X10, X11
32 MOV X1, X12
33 JMP racecalladdr<>(SB)
34
35// func runtime·RaceRead(addr uintptr)
36TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
37 // This needs to be a tail call, because raceread reads caller pc.
38 JMP runtime·raceread(SB)
39
40// func runtime·racereadpc(void *addr, void *callpc, void *pc)
41TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
42 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
43 MOV $__tsan_read_pc(SB), X5
44 MOV addr+0(FP), X11
45 MOV callpc+8(FP), X12
46 MOV pc+16(FP), X13
47 JMP racecalladdr<>(SB)
48
49// func runtime·racewrite(addr uintptr)
50// Called from instrumented code.
51TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
52 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
53 MOV $__tsan_write(SB), X5
54 MOV X10, X11
55 MOV X1, X12
56 JMP racecalladdr<>(SB)
57
58// func runtime·RaceWrite(addr uintptr)
59TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
60 // This needs to be a tail call, because racewrite reads caller pc.
61 JMP runtime·racewrite(SB)
62
63// func runtime·racewritepc(void *addr, void *callpc, void *pc)
64TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
65 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66 MOV $__tsan_write_pc(SB), X5
67 MOV addr+0(FP), X11
68 MOV callpc+8(FP), X12
69 MOV pc+16(FP), X13
70 JMP racecalladdr<>(SB)
71
72// func runtime·racereadrange(addr, size uintptr)
73// Called from instrumented code.
74TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
75 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
76 MOV $__tsan_read_range(SB), X5
77 MOV X11, X12
78 MOV X10, X11
79 MOV X1, X13
80 JMP racecalladdr<>(SB)
81
82// func runtime·RaceReadRange(addr, size uintptr)
83TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
84 // This needs to be a tail call, because racereadrange reads caller pc.
85 JMP runtime·racereadrange(SB)
86
87// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
88TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
89 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
90 MOV $__tsan_read_range(SB), X5
91 MOV addr+0(FP), X11
92 MOV size+8(FP), X12
93 MOV pc+16(FP), X13
94
95 // pc is an interceptor address, but TSan expects it to point to the
96 // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
97 ADD $4, X13
98 JMP racecalladdr<>(SB)
99
100// func runtime·racewriterange(addr, size uintptr)
101// Called from instrumented code.
102TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
103 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
104 MOV $__tsan_write_range(SB), X5
105 MOV X11, X12
106 MOV X10, X11
107 MOV X1, X13
108 JMP racecalladdr<>(SB)
109
110// func runtime·RaceWriteRange(addr, size uintptr)
111TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
112 // This needs to be a tail call, because racewriterange reads caller pc.
113 JMP runtime·racewriterange(SB)
114
115// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
116TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
117 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
118 MOV $__tsan_write_range(SB), X5
119 MOV addr+0(FP), X11
120 MOV size+8(FP), X12
121 MOV pc+16(FP), X13
122 // pc is an interceptor address, but TSan expects it to point to the
123 // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
124 ADD $4, X13
125 JMP racecalladdr<>(SB)
126
127// If addr (X11) is out of range, do nothing. Otherwise, setup goroutine context and
128// invoke racecall. Other arguments are already set.
129TEXT racecalladdr<>(SB), NOSPLIT, $0-0
130 MOV runtime·racearenastart(SB), X7
131 BLT X11, X7, data // Before racearena start?
132 MOV runtime·racearenaend(SB), X7
133 BLT X11, X7, call // Before racearena end?
134data:
135 MOV runtime·racedatastart(SB), X7
136 BLT X11, X7, ret // Before racedata start?
137 MOV runtime·racedataend(SB), X7
138 BGE X11, X7, ret // At or after racedata end?
139call:
140 MOV g_racectx(g), X10
141 JMP racecall<>(SB)
142ret:
143 RET
144
145// func runtime·racefuncenter(pc uintptr)
146// Called from instrumented code.
147TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
148 MOV $__tsan_func_enter(SB), X5
149 MOV X10, X11
150 MOV g_racectx(g), X10
151 JMP racecall<>(SB)
152
153// Common code for racefuncenter
154// X1 = caller's return address
155TEXT racefuncenter<>(SB), NOSPLIT, $0-0
156 // void __tsan_func_enter(ThreadState *thr, void *pc);
157 MOV $__tsan_func_enter(SB), X5
158 MOV g_racectx(g), X10
159 MOV X1, X11
160 JMP racecall<>(SB)
161
162// func runtime·racefuncexit()
163// Called from instrumented code.
164TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
165 // void __tsan_func_exit(ThreadState *thr);
166 MOV $__tsan_func_exit(SB), X5
167 MOV g_racectx(g), X10
168 JMP racecall<>(SB)
169
170// Atomic operations for sync/atomic package.
171
172// Load
173
174TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
175 GO_ARGS
176 MOV $__tsan_go_atomic32_load(SB), X5
177 CALL racecallatomic<>(SB)
178 RET
179
180TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
181 GO_ARGS
182 MOV $__tsan_go_atomic64_load(SB), X5
183 CALL racecallatomic<>(SB)
184 RET
185
186TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
187 GO_ARGS
188 JMP sync∕atomic·LoadInt32(SB)
189
190TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
191 GO_ARGS
192 JMP sync∕atomic·LoadInt64(SB)
193
194TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
195 GO_ARGS
196 JMP sync∕atomic·LoadInt64(SB)
197
198TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
199 GO_ARGS
200 JMP sync∕atomic·LoadInt64(SB)
201
202// Store
203
204TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
205 GO_ARGS
206 MOV $__tsan_go_atomic32_store(SB), X5
207 CALL racecallatomic<>(SB)
208 RET
209
210TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
211 GO_ARGS
212 MOV $__tsan_go_atomic64_store(SB), X5
213 CALL racecallatomic<>(SB)
214 RET
215
216TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
217 GO_ARGS
218 JMP sync∕atomic·StoreInt32(SB)
219
220TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
221 GO_ARGS
222 JMP sync∕atomic·StoreInt64(SB)
223
224TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
225 GO_ARGS
226 JMP sync∕atomic·StoreInt64(SB)
227
228// Swap
229
230TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
231 GO_ARGS
232 MOV $__tsan_go_atomic32_exchange(SB), X5
233 CALL racecallatomic<>(SB)
234 RET
235
236TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
237 GO_ARGS
238 MOV $__tsan_go_atomic64_exchange(SB), X5
239 CALL racecallatomic<>(SB)
240 RET
241
242TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
243 GO_ARGS
244 JMP sync∕atomic·SwapInt32(SB)
245
246TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
247 GO_ARGS
248 JMP sync∕atomic·SwapInt64(SB)
249
250TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
251 GO_ARGS
252 JMP sync∕atomic·SwapInt64(SB)
253
254// Add
255
256TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
257 GO_ARGS
258 MOV $__tsan_go_atomic32_fetch_add(SB), X5
259 CALL racecallatomic<>(SB)
260 // TSan performed fetch_add, but Go needs add_fetch.
261 MOVW add+8(FP), X5
262 MOVW ret+16(FP), X6
263 ADD X5, X6, X5
264 MOVW X5, ret+16(FP)
265 RET
266
267TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
268 GO_ARGS
269 MOV $__tsan_go_atomic64_fetch_add(SB), X5
270 CALL racecallatomic<>(SB)
271 // TSan performed fetch_add, but Go needs add_fetch.
272 MOV add+8(FP), X5
273 MOV ret+16(FP), X6
274 ADD X5, X6, X5
275 MOV X5, ret+16(FP)
276 RET
277
278TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
279 GO_ARGS
280 JMP sync∕atomic·AddInt32(SB)
281
282TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
283 GO_ARGS
284 JMP sync∕atomic·AddInt64(SB)
285
286TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
287 GO_ARGS
288 JMP sync∕atomic·AddInt64(SB)
289
290// And
291TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
292 GO_ARGS
293 MOV $__tsan_go_atomic32_fetch_and(SB), X5
294 CALL racecallatomic<>(SB)
295 RET
296
297TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
298 GO_ARGS
299 MOV $__tsan_go_atomic64_fetch_and(SB), X5
300 CALL racecallatomic<>(SB)
301 RET
302
303TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
304 GO_ARGS
305 JMP sync∕atomic·AndInt32(SB)
306
307TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
308 GO_ARGS
309 JMP sync∕atomic·AndInt64(SB)
310
311TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
312 GO_ARGS
313 JMP sync∕atomic·AndInt64(SB)
314
315// Or
316TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
317 GO_ARGS
318 MOV $__tsan_go_atomic32_fetch_or(SB), X5
319 CALL racecallatomic<>(SB)
320 RET
321
322TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
323 GO_ARGS
324 MOV $__tsan_go_atomic64_fetch_or(SB), X5
325 CALL racecallatomic<>(SB)
326 RET
327
328TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
329 GO_ARGS
330 JMP sync∕atomic·OrInt32(SB)
331
332TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
333 GO_ARGS
334 JMP sync∕atomic·OrInt64(SB)
335
336TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
337 GO_ARGS
338 JMP sync∕atomic·OrInt64(SB)
339
340// CompareAndSwap
341
342TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
343 GO_ARGS
344 MOV $__tsan_go_atomic32_compare_exchange(SB), X5
345 CALL racecallatomic<>(SB)
346 RET
347
348TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
349 GO_ARGS
350 MOV $__tsan_go_atomic64_compare_exchange(SB), X5
351 CALL racecallatomic<>(SB)
352 RET
353
354TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
355 GO_ARGS
356 JMP sync∕atomic·CompareAndSwapInt32(SB)
357
358TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
359 GO_ARGS
360 JMP sync∕atomic·CompareAndSwapInt64(SB)
361
362TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
363 GO_ARGS
364 JMP sync∕atomic·CompareAndSwapInt64(SB)
365
366// Generic atomic operation implementation.
367// X5 = addr of target function
368TEXT racecallatomic<>(SB), NOSPLIT, $0
369 // Set up these registers
370 // X10 = *ThreadState
371 // X11 = caller pc
372 // X12 = pc
373 // X13 = addr of incoming arg list
374
375 // Trigger SIGSEGV early.
376 MOV 24(X2), X6 // 1st arg is addr. after two times CALL, get it at 24(X2)
377 MOVB (X6), X0 // segv here if addr is bad
378 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
379 MOV runtime·racearenastart(SB), X7
380 BLT X6, X7, racecallatomic_data
381 MOV runtime·racearenaend(SB), X7
382 BLT X6, X7, racecallatomic_ok
383racecallatomic_data:
384 MOV runtime·racedatastart(SB), X7
385 BLT X6, X7, racecallatomic_ignore
386 MOV runtime·racedataend(SB), X7
387 BGE X6, X7, racecallatomic_ignore
388racecallatomic_ok:
389 // Addr is within the good range, call the atomic function.
390 MOV g_racectx(g), X10 // goroutine context
391 MOV 8(X2), X11 // caller pc
392 MOV X1, X12 // pc
393 ADD $24, X2, X13
394 CALL racecall<>(SB)
395 RET
396racecallatomic_ignore:
397 // Addr is outside the good range.
398 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
399 // An attempt to synchronize on the address would cause crash.
400 MOV X1, X20 // save PC
401 MOV X5, X21 // save target function
402 MOV $__tsan_go_ignore_sync_begin(SB), X5
403 MOV g_racectx(g), X10 // goroutine context
404 CALL racecall<>(SB)
405 MOV X21, X5 // restore the target function
406 // Call the atomic function.
407 MOV g_racectx(g), X10 // goroutine context
408 MOV 8(X2), X11 // caller pc
409 MOV X20, X12 // pc
410 ADD $24, X2, X13 // arguments
411 CALL racecall<>(SB)
412 // Call __tsan_go_ignore_sync_end.
413 MOV $__tsan_go_ignore_sync_end(SB), X5
414 MOV g_racectx(g), X10 // goroutine context
415 CALL racecall<>(SB)
416 RET
417
418// func runtime·racecall(void(*f)(...), ...)
419// Calls C function f from race runtime and passes up to 4 arguments to it.
420// The arguments are never heap-object-preserving pointers, so we pretend there
421// are no arguments.
422TEXT runtime·racecall(SB), NOSPLIT, $0-0
423 MOV fn+0(FP), X5
424 MOV arg0+8(FP), X10
425 MOV arg1+16(FP), X11
426 MOV arg2+24(FP), X12
427 MOV arg3+32(FP), X13
428 JMP racecall<>(SB)
429
430// Switches SP to g0 stack and calls X5. Arguments are already set.
431TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
432 MOV X1, X18 // Save RA in callee save register
433 MOV X2, X19 // Save SP in callee save register
434 CALL runtime·save_g(SB) // Save g for callbacks
435
436 MOV g_m(g), X6
437
438 // Switch to g0 stack if we aren't already on g0 or gsignal.
439 MOV m_gsignal(X6), X7
440 BEQ X7, g, call
441 MOV m_g0(X6), X7
442 BEQ X7, g, call
443
444 MOV (g_sched+gobuf_sp)(X7), X2 // Switch to g0 stack
445call:
446 JALR RA, (X5) // Call C function
447 MOV X19, X2 // Restore SP
448 JMP (X18) // Return to Go.
449
450// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
451// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
452// The overall effect of Go->C->Go call chain is similar to that of mcall.
453// R0 contains command code. R1 contains command-specific context.
454// See racecallback for command codes.
455TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
456 // Handle command raceGetProcCmd (0) here.
457 // First, code below assumes that we are on curg, while raceGetProcCmd
458 // can be executed on g0. Second, it is called frequently, so will
459 // benefit from this fast path.
460 BNEZ X10, rest
461 MOV X1, X5
462 MOV g, X6
463 CALL runtime·load_g(SB)
464 MOV g_m(g), X7
465 MOV m_p(X7), X7
466 MOV p_raceprocctx(X7), X7
467 MOV X7, (X11)
468 MOV X6, g
469 JMP (X5)
470rest:
471 // Save callee-save registers (X8, X9, X18..X27, F8, F9, F18..F27),
472 // since Go code will not respect this.
473 // 8(X2) and 16(X2) are for args passed to racecallback
474 SUB $(27*8), X2
475 MOV X1, (0*8)(X2)
476 MOV X8, (3*8)(X2)
477 MOV X9, (4*8)(X2)
478 MOV X18, (5*8)(X2)
479 MOV X19, (6*8)(X2)
480 MOV X20, (7*8)(X2)
481 MOV X21, (8*8)(X2)
482 MOV X22, (9*8)(X2)
483 MOV X23, (10*8)(X2)
484 MOV X24, (11*8)(X2)
485 MOV X25, (12*8)(X2)
486 MOV X26, (13*8)(X2)
487 MOV g, (14*8)(X2)
488 MOVD F8, (15*8)(X2)
489 MOVD F9, (16*8)(X2)
490 MOVD F18, (17*8)(X2)
491 MOVD F19, (18*8)(X2)
492 MOVD F20, (19*8)(X2)
493 MOVD F21, (20*8)(X2)
494 MOVD F22, (21*8)(X2)
495 MOVD F23, (22*8)(X2)
496 MOVD F24, (23*8)(X2)
497 MOVD F25, (24*8)(X2)
498 MOVD F26, (25*8)(X2)
499 MOVD F27, (26*8)(X2)
500
501 // Set g = g0.
502 CALL runtime·load_g(SB)
503 MOV g_m(g), X5
504 MOV m_g0(X5), X6
505 BEQ X6, g, noswitch // branch if already on g0
506 MOV X6, g
507
508 MOV X10, 8(X2) // func arg
509 MOV X11, 16(X2) // func arg
510 CALL runtime·racecallback(SB)
511
512 // All registers are smashed after Go code, reload.
513 MOV g_m(g), X5
514 MOV m_curg(X5), g // g = m->curg
515ret:
516 // Restore callee-save registers.
517 MOV (0*8)(X2), X1
518 MOV (3*8)(X2), X8
519 MOV (4*8)(X2), X9
520 MOV (5*8)(X2), X18
521 MOV (6*8)(X2), X19
522 MOV (7*8)(X2), X20
523 MOV (8*8)(X2), X21
524 MOV (9*8)(X2), X22
525 MOV (10*8)(X2), X23
526 MOV (11*8)(X2), X24
527 MOV (12*8)(X2), X25
528 MOV (13*8)(X2), X26
529 MOV (14*8)(X2), g
530 MOVD (15*8)(X2), F8
531 MOVD (16*8)(X2), F9
532 MOVD (17*8)(X2), F18
533 MOVD (18*8)(X2), F19
534 MOVD (19*8)(X2), F20
535 MOVD (20*8)(X2), F21
536 MOVD (21*8)(X2), F22
537 MOVD (22*8)(X2), F23
538 MOVD (23*8)(X2), F24
539 MOVD (24*8)(X2), F25
540 MOVD (25*8)(X2), F26
541 MOVD (26*8)(X2), F27
542
543 ADD $(27*8), X2
544 JMP (X1)
545
546noswitch:
547 // already on g0
548 MOV X10, 8(X2) // func arg
549 MOV X11, 16(X2) // func arg
550 CALL runtime·racecallback(SB)
551 JMP ret
View as plain text