From 833a8e66b97661ef6c6f604bfd0dae1ea7b59fe2 Mon Sep 17 00:00:00 2001 From: Basilisk-Dev Date: Wed, 25 Mar 2026 16:59:01 -0400 Subject: [PATCH 1/5] Implement objc_msgSend function for the LoongArch CPU architecture --- CMakeLists.txt | 2 +- objc/message.h | 2 + objc_msgSend.S | 2 + objc_msgSend.loongarch64.S | 134 +++++++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 objc_msgSend.loongarch64.S diff --git a/CMakeLists.txt b/CMakeLists.txt index 887f0a9e..faa3d310 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -213,7 +213,7 @@ if (WIN32 AND NOT MINGW) COMMAND echo ${ASSEMBLER} ${ASM_TARGET} -c "${CMAKE_SOURCE_DIR}/objc_msgSend.S" -o "${CMAKE_BINARY_DIR}/objc_msgSend.obj" COMMAND ${ASSEMBLER} ${ASM_TARGET} -c "${CMAKE_SOURCE_DIR}/objc_msgSend.S" -o "${CMAKE_BINARY_DIR}/objc_msgSend.obj" MAIN_DEPENDENCY objc_msgSend.S - DEPENDS objc_msgSend.aarch64.S objc_msgSend.arm.S objc_msgSend.mips.S objc_msgSend.x86-32.S objc_msgSend.x86-64.S + DEPENDS objc_msgSend.aarch64.S objc_msgSend.arm.S objc_msgSend.loongarch64.S objc_msgSend.mips.S objc_msgSend.x86-32.S objc_msgSend.x86-64.S ) set(libobjc_ASM_OBJS block_trampolines.obj objc_msgSend.obj) endif() diff --git a/objc/message.h b/objc/message.h index ee4fba1d..7b107844 100644 --- a/objc/message.h +++ b/objc/message.h @@ -9,6 +9,8 @@ #if defined(__x86_64) || defined(__i386) || defined(__arm__) || \ defined(__mips_n64) || defined(__mips_n32) || \ defined(__ARM_ARCH_ISA_A64) || \ + (defined(__loongarch__) && defined(__loongarch_lp64) && \ + defined(__loongarch_double_float)) || \ (defined(__riscv) && __riscv_xlen == 64 && \ defined(__riscv_float_abi_double)) diff --git a/objc_msgSend.S b/objc_msgSend.S index 54275940..c90ceb9b 100644 --- a/objc_msgSend.S +++ b/objc_msgSend.S @@ -10,6 +10,8 @@ #include "objc_msgSend.aarch64.S" #elif defined(__riscv) && (__riscv_xlen == 64) && defined(__riscv_float_abi_double) #include "objc_msgSend.riscv64.S" +#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) +#include "objc_msgSend.loongarch64.S" #elif defined(__mips_n64) || defined(__mips_n32) #include "objc_msgSend.mips.S" #else diff --git a/objc_msgSend.loongarch64.S b/objc_msgSend.loongarch64.S new file mode 100644 index 00000000..9b6b6c0b --- /dev/null +++ b/objc_msgSend.loongarch64.S @@ -0,0 +1,134 @@ +#define ARGUMENT_SPILL_SIZE (10*8 + 8*8) + +.macro MSGSEND receiver, sel + .cfi_startproc + beqz \receiver, 3f // Skip everything if receiver is nil + + andi $t0, \receiver, SMALLOBJ_MASK + bnez $t0, 5f + + ld.d $t0, \receiver, 0 // Load class into t0 +0: + ld.d $t0, $t0, DTABLE_OFFSET // dtable -> t0 + ld.d $t1, \sel, 0 // selector->index -> t1 + ld.w $t2, $t0, SHIFT_OFFSET // dtable->shift -> t2 + + addi.d $t3, $zero, 8 + beq $t2, $t3, 1f + beqz $t2, 2f + + srli.d $t2, $t1, 13 // Extract byte 3 of sel index and multiply by 2^3 + andi $t2, $t2, 0x7f8 + add.d $t2, $t0, $t2 + ld.d $t0, $t2, DATA_OFFSET +1: + srli.d $t2, $t1, 5 // Extract byte 2 of sel index and multiply by 2^3 + andi $t2, $t2, 0x7f8 + add.d $t2, $t0, $t2 + ld.d $t0, $t2, DATA_OFFSET +2: + slli.d $t2, $t1, 3 // Multiply by 2^3 + andi $t2, $t2, 0x7f8 + add.d $t2, $t0, $t2 + ld.d $t0, $t2, DATA_OFFSET // Slot pointer is now in t0 + + beqz $t0, 4f // If the slot is nil, go to the C path + + ld.d $t0, $t0, SLOT_OFFSET // Load the method from the slot + jr $t0 // Tail-call the method + +3: + move \receiver, $zero + move \sel, $zero + movgr2fr.d $fa0, $zero + movgr2fr.d $fa1, $zero + ret + +4: + addi.d $sp, $sp, -ARGUMENT_SPILL_SIZE + + // Spill function arguments. + st.d $a0, $sp, 0 + st.d $a1, $sp, 8 + st.d $a2, $sp, 16 + st.d $a3, $sp, 24 + st.d $a4, $sp, 32 + st.d $a5, $sp, 40 + st.d $a6, $sp, 48 + st.d $a7, $sp, 56 + + // Spill FP arguments. + fst.d $fa0, $sp, 64 + fst.d $fa1, $sp, 72 + fst.d $fa2, $sp, 80 + fst.d $fa3, $sp, 88 + fst.d $fa4, $sp, 96 + fst.d $fa5, $sp, 104 + fst.d $fa6, $sp, 112 + fst.d $fa7, $sp, 120 + + st.d $fp, $sp, 128 + st.d $ra, $sp, 136 + + addi.d $fp, $sp, 128 + addi.d $sp, $sp, -16 + st.d \receiver, $sp, 0 // Keep &self at sp for slowMsgLookup + + .cfi_def_cfa fp, 16 + .cfi_offset fp, -16 + .cfi_offset ra, -8 + + move $a0, $sp // &self in first argument + move $a1, \sel + pcaddu18i $ra, %call36(CDECL(slowMsgLookup)) + jirl $ra, $ra, 0 + + move $t0, $a0 // IMP -> t0 + + ld.d $a0, $sp, 16 + ld.d $a1, $sp, 24 + ld.d $a2, $sp, 32 + ld.d $a3, $sp, 40 + ld.d $a4, $sp, 48 + ld.d $a5, $sp, 56 + ld.d $a6, $sp, 64 + ld.d $a7, $sp, 72 + + fld.d $fa0, $sp, 80 + fld.d $fa1, $sp, 88 + fld.d $fa2, $sp, 96 + fld.d $fa3, $sp, 104 + fld.d $fa4, $sp, 112 + fld.d $fa5, $sp, 120 + fld.d $fa6, $sp, 128 + fld.d $fa7, $sp, 136 + + ld.d $fp, $sp, 144 + ld.d $ra, $sp, 152 + ld.d \receiver, $sp, 0 + + addi.d $sp, $sp, ARGUMENT_SPILL_SIZE + addi.d $sp, $sp, 16 + + jr $t0 // Tail-call the method + +5: + pcalau12i $t1, %got_pc_hi20(CDECL(SmallObjectClasses)) + ld.d $t1, $t1, %got_pc_lo12(CDECL(SmallObjectClasses)) + slli.d $t0, $t0, 3 + ldx.d $t0, $t1, $t0 + b 0b + .cfi_endproc +.endm + +.globl CDECL(objc_msgSend_fpret) +TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function) +.globl CDECL(objc_msgSend) +TYPE_DIRECTIVE(CDECL(objc_msgSend), %function) +.globl CDECL(objc_msgSend_stret) +TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function) +CDECL(objc_msgSend): +CDECL(objc_msgSend_fpret): + MSGSEND $a0, $a1 +CDECL(objc_msgSend_stret): + MSGSEND $a1, $a2 // Pointer to stack frame in a0 From f8a018ba96e9ea9678bb2e6fbf42ea7f880c8123 Mon Sep 17 00:00:00 2001 From: Basilisk-Dev Date: Thu, 26 Mar 2026 11:34:00 -0400 Subject: [PATCH 2/5] tweak objc_msgSend.loongarch64.S to compile on older clang/llvm verisons --- objc_msgSend.loongarch64.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/objc_msgSend.loongarch64.S b/objc_msgSend.loongarch64.S index 9b6b6c0b..129771e0 100644 --- a/objc_msgSend.loongarch64.S +++ b/objc_msgSend.loongarch64.S @@ -74,9 +74,10 @@ addi.d $sp, $sp, -16 st.d \receiver, $sp, 0 // Keep &self at sp for slowMsgLookup - .cfi_def_cfa fp, 16 - .cfi_offset fp, -16 - .cfi_offset ra, -8 + // Use explicit DWARF register numbers for compatibility with older Clang IAS. + .cfi_def_cfa 22, 16 + .cfi_offset 22, -16 + .cfi_offset 1, -8 move $a0, $sp // &self in first argument move $a1, \sel From b25ac56563f8b1eb19e4dc6f737434da62fbc3dd Mon Sep 17 00:00:00 2001 From: Basilisk-Dev Date: Thu, 26 Mar 2026 11:35:57 -0400 Subject: [PATCH 3/5] Add CICD logic to build on loongarch64 --- .github/workflows/main.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 78929397..40f451b8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -100,6 +100,10 @@ jobs: system-processor: riscv64 triple: riscv64-linux-gnu rtld: ld-linux-riscv64-lp64d.so.1 + - name: loongarch64 + system-processor: loongarch64 + triple: loongarch64-linux-gnu + rtld: ld-linux-loongarch-lp64d.so.1 - name: ppc64el system-processor: powerpc64le triple: powerpc64le-linux-gnu @@ -118,7 +122,12 @@ jobs: - name: Install cross-compile toolchain and QEMU run: | sudo apt update - sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build + if [ "${{ matrix.arch.name }}" = "loongarch64" ]; then + # No libstdc++-9 package exists for LoongArch on Ubuntu, so use version 13. + sudo apt install libstdc++-13-dev-loong64-cross qemu-user ninja-build + else + sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build + fi - name: Configure CMake run: | export LDFLAGS="-L/usr/lib/llvm-${{ matrix.llvm-version }}/lib/ -fuse-ld=lld-${{ matrix.llvm-version}} -Wl,--dynamic-linker=/usr/${{ matrix.arch.triple }}/lib/${{ matrix.arch.rtld }},-rpath,/usr/${{ matrix.arch.triple }}/lib" From 20ff36fc02447a2bdda7dcab4814e912e6f4e388 Mon Sep 17 00:00:00 2001 From: Basilisk-Dev Date: Thu, 26 Mar 2026 11:46:44 -0400 Subject: [PATCH 4/5] Do not run CICD for Clang 16 and 17 since the linker Ubuntu ships cannot link it --- .github/workflows/main.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 40f451b8..2b32239d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -109,6 +109,13 @@ jobs: triple: powerpc64le-linux-gnu rtld: ld64.so.2 exclude: + # lld versions prior to 18 do not support linking LoongArch in the Ubuntu provided packages. + - llvm-version: 16 + arch: + name: loongarch64 + - llvm-version: 17 + arch: + name: loongarch64 # FIXME(hugo): Hangs while executing tests. - llvm-version: 18 arch: From 3346a9d2ea3339b907f2605a5fce4a177fe5af25 Mon Sep 17 00:00:00 2001 From: Basilisk-Dev Date: Sat, 2 May 2026 18:46:15 -0400 Subject: [PATCH 5/5] Port block trampolines to loongarch64 --- block_to_imp.c | 38 +++++++++++++++++++++++++++++++----- block_trampolines.S | 47 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/block_to_imp.c b/block_to_imp.c index 7bba1467..06065aea 100644 --- a/block_to_imp.c +++ b/block_to_imp.c @@ -118,8 +118,8 @@ struct block_header void *block; void(*fnptr)(void); /* - * On 64-bit platforms, we have 16 bytes for instructions, which ought to - * be enough without padding. + * On most 64-bit platforms, we have 16 bytes for instructions, which ought + * to be enough without padding. * Note: If we add too much padding, then we waste space but have no other * ill effects. If we get this too small, then the assert in * `init_trampolines` will fire on library load. @@ -129,10 +129,13 @@ struct block_header * PAGE_SIZE, so we need to pad block_header to 32 bytes. * On PowerPC 64-bit where sizeof(void *) = 8 bytes, we * add 16 bytes of padding. + * + * LoongArch64 needs five 4-byte instructions, so it also requires a + * 32-byte block_header. */ #if defined(__i386__) || (defined(__mips__) && !defined(__mips_n64)) || (defined(__powerpc__) && !defined(__powerpc64__)) uint64_t padding[3]; -#elif defined(__mips__) || defined(__ARM_ARCH_ISA_A64) || defined(__powerpc64__) +#elif defined(__mips__) || defined(__ARM_ARCH_ISA_A64) || defined(__powerpc64__) || (defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)) uint64_t padding[2]; #elif defined(__arm__) uint64_t padding; @@ -195,6 +198,15 @@ extern char __objc_block_trampoline_16; extern char __objc_block_trampoline_end_16; extern char __objc_block_trampoline_sret_16; extern char __objc_block_trampoline_end_sret_16; +#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) +extern char __objc_block_trampoline_16; +extern char __objc_block_trampoline_end_16; +extern char __objc_block_trampoline_sret_16; +extern char __objc_block_trampoline_end_sret_16; +extern char __objc_block_trampoline_64; +extern char __objc_block_trampoline_end_64; +extern char __objc_block_trampoline_sret_64; +extern char __objc_block_trampoline_end_sret_64; #endif // Cache the correct trampoline region @@ -223,12 +235,16 @@ PRIVATE void init_trampolines(void) // Check that sizeof(struct block_header) is a divisor of the current page size assert(trampoline_header_per_page * sizeof(struct block_header) == trampoline_page_size); - // Check that assumptions for all non-variable page size implementations - // (currently everything except AArch64) are met + // Check that assumptions for all non-variable page size implementations + // (currently everything except AArch64 and LoongArch64) are met #if defined(__powerpc64__) assert(trampoline_page_size == 0x10000); #elif defined(__ARM_ARCH_ISA_A64) assert(trampoline_page_size == 0x1000 || trampoline_page_size == 0x4000); +#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) + assert(trampoline_page_size == 0x1000 || + trampoline_page_size == 0x4000 || + trampoline_page_size == 0x10000); #else assert(trampoline_page_size == 0x1000); #endif @@ -241,6 +257,18 @@ PRIVATE void init_trampolines(void) trampoline_start_sret = &__objc_block_trampoline_sret_16; trampoline_end_sret = &__objc_block_trampoline_end_sret_16; } else { +#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) + if (trampoline_page_size == 0x4000) { + trampoline_start = &__objc_block_trampoline_16; + trampoline_end = &__objc_block_trampoline_end_16; + trampoline_start_sret = &__objc_block_trampoline_sret_16; + trampoline_end_sret = &__objc_block_trampoline_end_sret_16; + } else if (trampoline_page_size == 0x10000) { + trampoline_start = &__objc_block_trampoline_64; + trampoline_end = &__objc_block_trampoline_end_64; + trampoline_start_sret = &__objc_block_trampoline_sret_64; + trampoline_end_sret = &__objc_block_trampoline_end_sret_64; + } else { #else { #endif diff --git a/block_trampolines.S b/block_trampolines.S index 2f9f11b0..d04d7786 100644 --- a/block_trampolines.S +++ b/block_trampolines.S @@ -156,6 +156,37 @@ #define SARG0 ARG1 #define SARG1 ARG2 +#elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) +//////////////////////////////////////////////////////////////////////////////// +// LoongArch64 trampoline +//////////////////////////////////////////////////////////////////////////////// +.macro trampoline arg0, arg1 + pcaddi $t0, -1024 + move \arg1, \arg0 + ld.d \arg0, $t0, 0 + ld.d $t0, $t0, 8 + jr $t0 +.endm +.macro trampoline_16 arg0, arg1 + pcaddi $t0, -4096 + move \arg1, \arg0 + ld.d \arg0, $t0, 0 + ld.d $t0, $t0, 8 + jr $t0 +.endm +.macro trampoline_64 arg0, arg1 + pcaddi $t0, -16384 + move \arg1, \arg0 + ld.d \arg0, $t0, 0 + ld.d $t0, $t0, 8 + jr $t0 +.endm +#define ARG0 $a0 +#define ARG1 $a1 +#define ARG2 $a2 +#define SARG0 ARG1 +#define SARG1 ARG2 + #elif defined(__ARM_ARCH_ISA_A64) //////////////////////////////////////////////////////////////////////////////// // AArch64 (ARM64) trampoline @@ -234,7 +265,7 @@ CDECL(__objc_block_trampoline_sret): CDECL(__objc_block_trampoline_end_sret): // Trampoline for 16 KiB page sizes -#if defined(__ARM_ARCH_ISA_A64) +#if defined(__ARM_ARCH_ISA_A64) || (defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)) .globl CDECL(__objc_block_trampoline_16) CDECL(__objc_block_trampoline_16): trampoline_16 ARG0, ARG1 @@ -247,6 +278,20 @@ CDECL(__objc_block_trampoline_sret_16): CDECL(__objc_block_trampoline_end_sret_16): #endif +// Trampoline for 64 KiB page sizes +#if defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) +.globl CDECL(__objc_block_trampoline_64) +CDECL(__objc_block_trampoline_64): + trampoline_64 ARG0, ARG1 +.globl CDECL(__objc_block_trampoline_end_64) +CDECL(__objc_block_trampoline_end_64): +.globl CDECL(__objc_block_trampoline_sret_64) +CDECL(__objc_block_trampoline_sret_64): + trampoline_64 SARG0, SARG1 +.globl CDECL(__objc_block_trampoline_end_sret_64) +CDECL(__objc_block_trampoline_end_sret_64): +#endif + #ifdef __ELF__ .section .note.GNU-stack,"",%progbits #endif