Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,22 @@ jobs:
system-processor: riscv64
triple: riscv64-linux-gnu
rtld: ld-linux-riscv64-lp64d.so.1
- name: loongarch64
system-processor: loongarch64
triple: loongarch64-linux-gnu
rtld: ld-linux-loongarch-lp64d.so.1
- name: ppc64el
system-processor: powerpc64le
triple: powerpc64le-linux-gnu
rtld: ld64.so.2
exclude:
# lld versions prior to 18 do not support linking LoongArch in the Ubuntu provided packages.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Linker relaxation related? Might be good to clarify for the record.

- llvm-version: 16
arch:
name: loongarch64
- llvm-version: 17
arch:
name: loongarch64
# FIXME(hugo): Hangs while executing tests.
- llvm-version: 18
arch:
Expand All @@ -118,7 +129,12 @@ jobs:
- name: Install cross-compile toolchain and QEMU
run: |
sudo apt update
sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build
if [ "${{ matrix.arch.name }}" = "loongarch64" ]; then
# No libstdc++-9 package exists for LoongArch on Ubuntu, so use version 13.
sudo apt install libstdc++-13-dev-loong64-cross qemu-user ninja-build
else
sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build
fi
- name: Configure CMake
run: |
export LDFLAGS="-L/usr/lib/llvm-${{ matrix.llvm-version }}/lib/ -fuse-ld=lld-${{ matrix.llvm-version}} -Wl,--dynamic-linker=/usr/${{ matrix.arch.triple }}/lib/${{ matrix.arch.rtld }},-rpath,/usr/${{ matrix.arch.triple }}/lib"
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ if (WIN32 AND NOT MINGW)
COMMAND echo ${ASSEMBLER} ${ASM_TARGET} -c "${CMAKE_SOURCE_DIR}/objc_msgSend.S" -o "${CMAKE_BINARY_DIR}/objc_msgSend.obj"
COMMAND ${ASSEMBLER} ${ASM_TARGET} -c "${CMAKE_SOURCE_DIR}/objc_msgSend.S" -o "${CMAKE_BINARY_DIR}/objc_msgSend.obj"
MAIN_DEPENDENCY objc_msgSend.S
DEPENDS objc_msgSend.aarch64.S objc_msgSend.arm.S objc_msgSend.mips.S objc_msgSend.x86-32.S objc_msgSend.x86-64.S
DEPENDS objc_msgSend.aarch64.S objc_msgSend.arm.S objc_msgSend.loongarch64.S objc_msgSend.mips.S objc_msgSend.x86-32.S objc_msgSend.x86-64.S
)
set(libobjc_ASM_OBJS block_trampolines.obj objc_msgSend.obj)
endif()
Expand Down
38 changes: 33 additions & 5 deletions block_to_imp.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ struct block_header
void *block;
void(*fnptr)(void);
/*
* On 64-bit platforms, we have 16 bytes for instructions, which ought to
* be enough without padding.
* On most 64-bit platforms, we have 16 bytes for instructions, which ought
* to be enough without padding.
* Note: If we add too much padding, then we waste space but have no other
* ill effects. If we get this too small, then the assert in
* `init_trampolines` will fire on library load.
Expand All @@ -129,10 +129,13 @@ struct block_header
* PAGE_SIZE, so we need to pad block_header to 32 bytes.
* On PowerPC 64-bit where sizeof(void *) = 8 bytes, we
* add 16 bytes of padding.
*
* LoongArch64 needs five 4-byte instructions, so it also requires a
Comment thread
basilisk-dev marked this conversation as resolved.
* 32-byte block_header.
*/
#if defined(__i386__) || (defined(__mips__) && !defined(__mips_n64)) || (defined(__powerpc__) && !defined(__powerpc64__))
uint64_t padding[3];
#elif defined(__mips__) || defined(__ARM_ARCH_ISA_A64) || defined(__powerpc64__)
#elif defined(__mips__) || defined(__ARM_ARCH_ISA_A64) || defined(__powerpc64__) || (defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto above regarding arch macro.

uint64_t padding[2];
#elif defined(__arm__)
uint64_t padding;
Expand Down Expand Up @@ -195,6 +198,15 @@ extern char __objc_block_trampoline_16;
extern char __objc_block_trampoline_end_16;
extern char __objc_block_trampoline_sret_16;
extern char __objc_block_trampoline_end_sret_16;
#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto regarding arch macro.

extern char __objc_block_trampoline_16;
extern char __objc_block_trampoline_end_16;
extern char __objc_block_trampoline_sret_16;
extern char __objc_block_trampoline_end_sret_16;
extern char __objc_block_trampoline_64;
extern char __objc_block_trampoline_end_64;
extern char __objc_block_trampoline_sret_64;
extern char __objc_block_trampoline_end_sret_64;
#endif

// Cache the correct trampoline region
Expand Down Expand Up @@ -223,12 +235,16 @@ PRIVATE void init_trampolines(void)
// Check that sizeof(struct block_header) is a divisor of the current page size
assert(trampoline_header_per_page * sizeof(struct block_header) == trampoline_page_size);

// Check that assumptions for all non-variable page size implementations
// (currently everything except AArch64) are met
// Check that assumptions for all non-variable page size implementations
// (currently everything except AArch64 and LoongArch64) are met
#if defined(__powerpc64__)
assert(trampoline_page_size == 0x10000);
#elif defined(__ARM_ARCH_ISA_A64)
assert(trampoline_page_size == 0x1000 || trampoline_page_size == 0x4000);
#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)
assert(trampoline_page_size == 0x1000 ||
trampoline_page_size == 0x4000 ||
trampoline_page_size == 0x10000);
#else
assert(trampoline_page_size == 0x1000);
#endif
Expand All @@ -241,6 +257,18 @@ PRIVATE void init_trampolines(void)
trampoline_start_sret = &__objc_block_trampoline_sret_16;
trampoline_end_sret = &__objc_block_trampoline_end_sret_16;
} else {
#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

if (trampoline_page_size == 0x4000) {
trampoline_start = &__objc_block_trampoline_16;
trampoline_end = &__objc_block_trampoline_end_16;
trampoline_start_sret = &__objc_block_trampoline_sret_16;
trampoline_end_sret = &__objc_block_trampoline_end_sret_16;
} else if (trampoline_page_size == 0x10000) {
trampoline_start = &__objc_block_trampoline_64;
trampoline_end = &__objc_block_trampoline_end_64;
trampoline_start_sret = &__objc_block_trampoline_sret_64;
trampoline_end_sret = &__objc_block_trampoline_end_sret_64;
} else {
#else
{
#endif
Expand Down
47 changes: 46 additions & 1 deletion block_trampolines.S
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,37 @@
#define SARG0 ARG1
#define SARG1 ARG2

#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

////////////////////////////////////////////////////////////////////////////////
// LoongArch64 trampoline
////////////////////////////////////////////////////////////////////////////////
.macro trampoline arg0, arg1
pcaddi $t0, -1024
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The LoongArch ABI doesn’t do the MIPS trick of requiring a specific register for jalr so you know the PC on entry?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it doesn't, the MIPS way is mostly a hack for lacking the instructions like pcaddi.

move \arg1, \arg0
ld.d \arg0, $t0, 0
ld.d $t0, $t0, 8
jr $t0
.endm
.macro trampoline_16 arg0, arg1
pcaddi $t0, -4096
move \arg1, \arg0
ld.d \arg0, $t0, 0
ld.d $t0, $t0, 8
jr $t0
.endm
.macro trampoline_64 arg0, arg1
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These trampolines appear to differ only by the displacement from the start, can’t that be just another macro argument?

pcaddi $t0, -16384
move \arg1, \arg0
ld.d \arg0, $t0, 0
ld.d $t0, $t0, 8
jr $t0
.endm
#define ARG0 $a0
#define ARG1 $a1
#define ARG2 $a2
#define SARG0 ARG1
#define SARG1 ARG2

#elif defined(__ARM_ARCH_ISA_A64)
////////////////////////////////////////////////////////////////////////////////
// AArch64 (ARM64) trampoline
Expand Down Expand Up @@ -234,7 +265,7 @@ CDECL(__objc_block_trampoline_sret):
CDECL(__objc_block_trampoline_end_sret):

// Trampoline for 16 KiB page sizes
#if defined(__ARM_ARCH_ISA_A64)
#if defined(__ARM_ARCH_ISA_A64) || (defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

.globl CDECL(__objc_block_trampoline_16)
CDECL(__objc_block_trampoline_16):
trampoline_16 ARG0, ARG1
Expand All @@ -247,6 +278,20 @@ CDECL(__objc_block_trampoline_sret_16):
CDECL(__objc_block_trampoline_end_sret_16):
#endif

// Trampoline for 64 KiB page sizes
#if defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

.globl CDECL(__objc_block_trampoline_64)
CDECL(__objc_block_trampoline_64):
trampoline_64 ARG0, ARG1
.globl CDECL(__objc_block_trampoline_end_64)
CDECL(__objc_block_trampoline_end_64):
.globl CDECL(__objc_block_trampoline_sret_64)
CDECL(__objc_block_trampoline_sret_64):
trampoline_64 SARG0, SARG1
.globl CDECL(__objc_block_trampoline_end_sret_64)
CDECL(__objc_block_trampoline_end_sret_64):
#endif

#ifdef __ELF__
.section .note.GNU-stack,"",%progbits
#endif
2 changes: 2 additions & 0 deletions objc/message.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#if defined(__x86_64) || defined(__i386) || defined(__arm__) || \
defined(__mips_n64) || defined(__mips_n32) || \
defined(__ARM_ARCH_ISA_A64) || \
(defined(__loongarch__) && defined(__loongarch_lp64) && \
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__loongarch_lp64 should suffice on its own, cc @xen0n @xry111 right?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__loongarch__ should be redundant, but I think we should guard against __loongarch_double_float since floating registers are spilled/restored in the code as well.

defined(__loongarch_double_float)) || \
(defined(__riscv) && __riscv_xlen == 64 && \
defined(__riscv_float_abi_double))

Expand Down
2 changes: 2 additions & 0 deletions objc_msgSend.S
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "objc_msgSend.aarch64.S"
#elif defined(__riscv) && (__riscv_xlen == 64) && defined(__riscv_float_abi_double)
#include "objc_msgSend.riscv64.S"
#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

#include "objc_msgSend.loongarch64.S"
#elif defined(__mips_n64) || defined(__mips_n32)
#include "objc_msgSend.mips.S"
#else
Expand Down
135 changes: 135 additions & 0 deletions objc_msgSend.loongarch64.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#define ARGUMENT_SPILL_SIZE (10*8 + 8*8)

.macro MSGSEND receiver, sel
.cfi_startproc
beqz \receiver, 3f // Skip everything if receiver is nil

andi $t0, \receiver, SMALLOBJ_MASK
bnez $t0, 5f

ld.d $t0, \receiver, 0 // Load class into t0
0:
ld.d $t0, $t0, DTABLE_OFFSET // dtable -> t0
ld.d $t1, \sel, 0 // selector->index -> t1
ld.w $t2, $t0, SHIFT_OFFSET // dtable->shift -> t2

addi.d $t3, $zero, 8
beq $t2, $t3, 1f
beqz $t2, 2f

srli.d $t2, $t1, 13 // Extract byte 3 of sel index and multiply by 2^3
andi $t2, $t2, 0x7f8
add.d $t2, $t0, $t2
ld.d $t0, $t2, DATA_OFFSET
1:
srli.d $t2, $t1, 5 // Extract byte 2 of sel index and multiply by 2^3
andi $t2, $t2, 0x7f8
add.d $t2, $t0, $t2
ld.d $t0, $t2, DATA_OFFSET
2:
slli.d $t2, $t1, 3 // Multiply by 2^3
andi $t2, $t2, 0x7f8
add.d $t2, $t0, $t2
ld.d $t0, $t2, DATA_OFFSET // Slot pointer is now in t0

beqz $t0, 4f // If the slot is nil, go to the C path

ld.d $t0, $t0, SLOT_OFFSET // Load the method from the slot
jr $t0 // Tail-call the method

3:
move \receiver, $zero
move \sel, $zero
movgr2fr.d $fa0, $zero
movgr2fr.d $fa1, $zero
ret

4:
addi.d $sp, $sp, -ARGUMENT_SPILL_SIZE

// Spill function arguments.
st.d $a0, $sp, 0
st.d $a1, $sp, 8
st.d $a2, $sp, 16
st.d $a3, $sp, 24
st.d $a4, $sp, 32
st.d $a5, $sp, 40
st.d $a6, $sp, 48
st.d $a7, $sp, 56

// Spill FP arguments.
fst.d $fa0, $sp, 64
fst.d $fa1, $sp, 72
fst.d $fa2, $sp, 80
fst.d $fa3, $sp, 88
fst.d $fa4, $sp, 96
fst.d $fa5, $sp, 104
fst.d $fa6, $sp, 112
fst.d $fa7, $sp, 120

st.d $fp, $sp, 128
st.d $ra, $sp, 136

addi.d $fp, $sp, 128
addi.d $sp, $sp, -16
st.d \receiver, $sp, 0 // Keep &self at sp for slowMsgLookup

// Use explicit DWARF register numbers for compatibility with older Clang IAS.
.cfi_def_cfa 22, 16
.cfi_offset 22, -16
.cfi_offset 1, -8

move $a0, $sp // &self in first argument
move $a1, \sel
pcaddu18i $ra, %call36(CDECL(slowMsgLookup))
jirl $ra, $ra, 0

move $t0, $a0 // IMP -> t0

ld.d $a0, $sp, 16
ld.d $a1, $sp, 24
ld.d $a2, $sp, 32
ld.d $a3, $sp, 40
ld.d $a4, $sp, 48
ld.d $a5, $sp, 56
ld.d $a6, $sp, 64
ld.d $a7, $sp, 72

fld.d $fa0, $sp, 80
fld.d $fa1, $sp, 88
fld.d $fa2, $sp, 96
fld.d $fa3, $sp, 104
fld.d $fa4, $sp, 112
fld.d $fa5, $sp, 120
fld.d $fa6, $sp, 128
fld.d $fa7, $sp, 136

ld.d $fp, $sp, 144
ld.d $ra, $sp, 152
ld.d \receiver, $sp, 0

addi.d $sp, $sp, ARGUMENT_SPILL_SIZE
addi.d $sp, $sp, 16

jr $t0 // Tail-call the method

5:
pcalau12i $t1, %got_pc_hi20(CDECL(SmallObjectClasses))
ld.d $t1, $t1, %got_pc_lo12(CDECL(SmallObjectClasses))
slli.d $t0, $t0, 3
ldx.d $t0, $t1, $t0
b 0b
.cfi_endproc
.endm

.globl CDECL(objc_msgSend_fpret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function)
.globl CDECL(objc_msgSend)
TYPE_DIRECTIVE(CDECL(objc_msgSend), %function)
.globl CDECL(objc_msgSend_stret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function)
CDECL(objc_msgSend):
CDECL(objc_msgSend_fpret):
MSGSEND $a0, $a1
CDECL(objc_msgSend_stret):
MSGSEND $a1, $a2 // Pointer to stack frame in a0
Loading