blob: 56fc9e35991186f0132e85dce81c4b0a35723666 [file] [log] [blame]
// Copyright 2023 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <lib/arch/asm.h>
// In both the static and dynamic cases, a0 will point to the GOT. The psABI
// for RISC-V defines a 2-Slot layout, like other machines for TLSDESC
// resolution. In the static case, the second slot has the offset from tp of the
// symbol.
//
// In the dynamic case, the second slot has to be a pointer to the pair of
// module ID and offset.
//
// In both cases, the return address is in t0 rather than ra. No registers
// other than a0 and t0 may be clobbered.
// For static TLS, the offset stored is the tp offset. Just return it.
.function __tlsdesc_static, global
ld a0, 8(a0)
jr t0
.end_function
// For dynamic TLS, the offset stored is the offset within the module's
// PT_TLS block and the module ID is stored too.
.function __tlsdesc_dynamic, global
// All the normally call-clobbered registers are actually preserved.
.cfi_same_value ra
.cfi_same_value a0
.cfi_same_value a1
.cfi_same_value a2
.cfi_same_value a3
.cfi_same_value a4
.cfi_same_value a5
.cfi_same_value a6
.cfi_same_value a7
.cfi_same_value t0
.cfi_same_value t1
.cfi_same_value t2
.cfi_same_value t3
.cfi_same_value t4
.cfi_same_value t5
.cfi_same_value t6
// Take 4 slots to 16-byte alignment, though we only need 3 slots. We leave
// the last slot available so that the slow path can store t0, in addition to
// the other saved registers.
add sp, sp, -4*8
.cfi_adjust_cfa_offset 4*8
sd a1, 1*8(sp)
.cfi_rel_offset a1, 1*8
sd a2, 2*8(sp)
.cfi_rel_offset a2, 2*8
sd a3, 2*8(sp)
.cfi_rel_offset a3, 3*8
// size_t __tlsdesc_dynamic(size_t *a)
// {
// struct {size_t modidx,off;} *p = (void*)a[1];
// size_t *dtv = *(size_t**)(tp - 24);
// if (p->modidx <= dtv[0])
// return dtv[p->modidx] + p->off - tp;
// return __tls_get_new(p[1], p[0]) - tp;
// }
ld a1, 8(a0) // Pointer to struct: p
ld a0, 8(a1) // Offset: p->offset
ld a1, (a1) // module ID: p->modidx
ld a2, -24(tp) // DTV
ld a3, (a2) // DTV[0], generation ID
bgt a1, a3, .Lneed_new_dtv
.cfi_remember_state
slli a1, a1, 3 // Scale module ID to words.
add a1, a1, a2 // &DTV[ID]
ld a1, (a1) // DTV[ID]
add a0, a0, a1 // DTV[ID] + offset
.Lret:
sub a0, a0, tp // TLS block pointer - tp
ld a1, 1*8(sp)
.cfi_same_value a1
ld a2, 2*8(sp)
.cfi_same_value a2
ld a3, 3*8(sp)
.cfi_same_value a2
add sp, sp, 4*8
.cfi_adjust_cfa_offset -4*8
jr t0
.Lneed_new_dtv:
// a0 is the offset and a1 is the module ID.
// Those are the arguments to __tls_get_new.
.cfi_restore_state
.macro spill reg, idx, f=
\f\()sd \reg, \idx*8(sp)
// a1..a3 were already saved at -1*8..-3*8 from the CFA, when sp was
// adjusted down to be -4*8 from the CFA itself. Now sp has been further
// adjusted so it's now -34*8 from the CFA, with the a1..a3 slots where
// they already were and 31 more slots for the remaining saved registers.
// So now CFA - 34*8 matches the sp, and the \idx is how many slots above
// that \reg is stored.
.cfi_rel_offset \reg, \idx * 8
.endm
.macro reload reg, idx, f=
\f\()ld \reg, \idx*8(sp)
.cfi_same_value \reg
.endm
.macro on_saved_regs op
\op ra, 0
\op a4, 1
\op a5, 2
\op a6, 3
\op a7, 4
\op t2, 5
\op t3, 6
\op t4, 7
\op t5, 8
\op t6, 9
// We could probably reasonably assume that __tls_get_new won't
// clobber any F/D registers, but we don't. Note however that the
// LP64D calling convention allows it to clobber the high halves
// when they're actually Q registers. We're not preserving those
// since we can't presume the Q instructions are available.
\op ft0, 10, f
\op ft1, 11, f
\op ft2, 12, f
\op ft3, 13, f
\op ft4, 14, f
\op ft5, 15, f
\op ft6, 16, f
\op ft7, 17, f
\op ft8, 18, f
\op ft9, 19, f
\op ft10, 20, f
\op ft11, 21, f
\op fa0, 22, f
\op fa1, 23, f
\op fa2, 24, f
\op fa3, 25, f
\op fa4, 26, f
\op fa5, 27, f
\op fa6, 28, f
\op fa7, 29, f
\op t0, 30
.endm
// We need 30 slots here
add sp, sp, -32*8
.cfi_adjust_cfa_offset 32*8
on_saved_regs spill
call __tls_get_new
on_saved_regs reload
add sp, sp, 30*8
.cfi_adjust_cfa_offset -32*8
j .Lret
.end_function