-
Notifications
You must be signed in to change notification settings - Fork 132
Implement full support for the LoongArch CPU architecture #377
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
833a8e6
f8a018b
b25ac56
20ff36f
3346a9d
85b3f0b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -118,8 +118,8 @@ struct block_header | |
| void *block; | ||
| void(*fnptr)(void); | ||
| /* | ||
| * On 64-bit platforms, we have 16 bytes for instructions, which ought to | ||
| * be enough without padding. | ||
| * On most 64-bit platforms, we have 16 bytes for instructions, which ought | ||
| * to be enough without padding. | ||
| * Note: If we add too much padding, then we waste space but have no other | ||
| * ill effects. If we get this too small, then the assert in | ||
| * `init_trampolines` will fire on library load. | ||
|
|
@@ -129,10 +129,13 @@ struct block_header | |
| * PAGE_SIZE, so we need to pad block_header to 32 bytes. | ||
| * On PowerPC 64-bit where sizeof(void *) = 8 bytes, we | ||
| * add 16 bytes of padding. | ||
| * | ||
| * LoongArch64 needs five 4-byte instructions, so it also requires a | ||
|
basilisk-dev marked this conversation as resolved.
|
||
| * 32-byte block_header. | ||
| */ | ||
| #if defined(__i386__) || (defined(__mips__) && !defined(__mips_n64)) || (defined(__powerpc__) && !defined(__powerpc64__)) | ||
| uint64_t padding[3]; | ||
| #elif defined(__mips__) || defined(__ARM_ARCH_ISA_A64) || defined(__powerpc64__) | ||
| #elif defined(__mips__) || defined(__ARM_ARCH_ISA_A64) || defined(__powerpc64__) || (defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto above regarding arch macro. |
||
| uint64_t padding[2]; | ||
| #elif defined(__arm__) | ||
| uint64_t padding; | ||
|
|
@@ -195,6 +198,15 @@ extern char __objc_block_trampoline_16; | |
| extern char __objc_block_trampoline_end_16; | ||
| extern char __objc_block_trampoline_sret_16; | ||
| extern char __objc_block_trampoline_end_sret_16; | ||
| #elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto regarding arch macro. |
||
| extern char __objc_block_trampoline_16; | ||
| extern char __objc_block_trampoline_end_16; | ||
| extern char __objc_block_trampoline_sret_16; | ||
| extern char __objc_block_trampoline_end_sret_16; | ||
| extern char __objc_block_trampoline_64; | ||
| extern char __objc_block_trampoline_end_64; | ||
| extern char __objc_block_trampoline_sret_64; | ||
| extern char __objc_block_trampoline_end_sret_64; | ||
| #endif | ||
|
|
||
| // Cache the correct trampoline region | ||
|
|
@@ -223,12 +235,16 @@ PRIVATE void init_trampolines(void) | |
| // Check that sizeof(struct block_header) is a divisor of the current page size | ||
| assert(trampoline_header_per_page * sizeof(struct block_header) == trampoline_page_size); | ||
|
|
||
| // Check that assumptions for all non-variable page size implementations | ||
| // (currently everything except AArch64) are met | ||
| // Check that assumptions for all non-variable page size implementations | ||
| // (currently everything except AArch64 and LoongArch64) are met | ||
| #if defined(__powerpc64__) | ||
| assert(trampoline_page_size == 0x10000); | ||
| #elif defined(__ARM_ARCH_ISA_A64) | ||
| assert(trampoline_page_size == 0x1000 || trampoline_page_size == 0x4000); | ||
| #elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) | ||
| assert(trampoline_page_size == 0x1000 || | ||
| trampoline_page_size == 0x4000 || | ||
| trampoline_page_size == 0x10000); | ||
| #else | ||
| assert(trampoline_page_size == 0x1000); | ||
| #endif | ||
|
|
@@ -241,6 +257,18 @@ PRIVATE void init_trampolines(void) | |
| trampoline_start_sret = &__objc_block_trampoline_sret_16; | ||
| trampoline_end_sret = &__objc_block_trampoline_end_sret_16; | ||
| } else { | ||
| #elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
| if (trampoline_page_size == 0x4000) { | ||
| trampoline_start = &__objc_block_trampoline_16; | ||
| trampoline_end = &__objc_block_trampoline_end_16; | ||
| trampoline_start_sret = &__objc_block_trampoline_sret_16; | ||
| trampoline_end_sret = &__objc_block_trampoline_end_sret_16; | ||
| } else if (trampoline_page_size == 0x10000) { | ||
| trampoline_start = &__objc_block_trampoline_64; | ||
| trampoline_end = &__objc_block_trampoline_end_64; | ||
| trampoline_start_sret = &__objc_block_trampoline_sret_64; | ||
| trampoline_end_sret = &__objc_block_trampoline_end_sret_64; | ||
| } else { | ||
| #else | ||
| { | ||
| #endif | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -156,6 +156,37 @@ | |
| #define SARG0 ARG1 | ||
| #define SARG1 ARG2 | ||
|
|
||
| #elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
| //////////////////////////////////////////////////////////////////////////////// | ||
| // LoongArch64 trampoline | ||
| //////////////////////////////////////////////////////////////////////////////// | ||
| .macro trampoline arg0, arg1 | ||
| pcaddi $t0, -1024 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The LoongArch ABI doesn’t do the MIPS trick of requiring a specific register for jalr so you know the PC on entry? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No it doesn't, the MIPS way is mostly a hack for lacking the instructions like pcaddi. |
||
| move \arg1, \arg0 | ||
| ld.d \arg0, $t0, 0 | ||
| ld.d $t0, $t0, 8 | ||
| jr $t0 | ||
| .endm | ||
| .macro trampoline_16 arg0, arg1 | ||
| pcaddi $t0, -4096 | ||
| move \arg1, \arg0 | ||
| ld.d \arg0, $t0, 0 | ||
| ld.d $t0, $t0, 8 | ||
| jr $t0 | ||
| .endm | ||
| .macro trampoline_64 arg0, arg1 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These trampolines appear to differ only by the displacement from the start, can’t that be just another macro argument? |
||
| pcaddi $t0, -16384 | ||
| move \arg1, \arg0 | ||
| ld.d \arg0, $t0, 0 | ||
| ld.d $t0, $t0, 8 | ||
| jr $t0 | ||
| .endm | ||
| #define ARG0 $a0 | ||
| #define ARG1 $a1 | ||
| #define ARG2 $a2 | ||
| #define SARG0 ARG1 | ||
| #define SARG1 ARG2 | ||
|
|
||
| #elif defined(__ARM_ARCH_ISA_A64) | ||
| //////////////////////////////////////////////////////////////////////////////// | ||
| // AArch64 (ARM64) trampoline | ||
|
|
@@ -234,7 +265,7 @@ CDECL(__objc_block_trampoline_sret): | |
| CDECL(__objc_block_trampoline_end_sret): | ||
|
|
||
| // Trampoline for 16 KiB page sizes | ||
| #if defined(__ARM_ARCH_ISA_A64) | ||
| #if defined(__ARM_ARCH_ISA_A64) || (defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
| .globl CDECL(__objc_block_trampoline_16) | ||
| CDECL(__objc_block_trampoline_16): | ||
| trampoline_16 ARG0, ARG1 | ||
|
|
@@ -247,6 +278,20 @@ CDECL(__objc_block_trampoline_sret_16): | |
| CDECL(__objc_block_trampoline_end_sret_16): | ||
| #endif | ||
|
|
||
| // Trampoline for 64 KiB page sizes | ||
| #if defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
| .globl CDECL(__objc_block_trampoline_64) | ||
| CDECL(__objc_block_trampoline_64): | ||
| trampoline_64 ARG0, ARG1 | ||
| .globl CDECL(__objc_block_trampoline_end_64) | ||
| CDECL(__objc_block_trampoline_end_64): | ||
| .globl CDECL(__objc_block_trampoline_sret_64) | ||
| CDECL(__objc_block_trampoline_sret_64): | ||
| trampoline_64 SARG0, SARG1 | ||
| .globl CDECL(__objc_block_trampoline_end_sret_64) | ||
| CDECL(__objc_block_trampoline_end_sret_64): | ||
| #endif | ||
|
|
||
| #ifdef __ELF__ | ||
| .section .note.GNU-stack,"",%progbits | ||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,8 @@ | |
| #if defined(__x86_64) || defined(__i386) || defined(__arm__) || \ | ||
| defined(__mips_n64) || defined(__mips_n32) || \ | ||
| defined(__ARM_ARCH_ISA_A64) || \ | ||
| (defined(__loongarch__) && defined(__loongarch_lp64) && \ | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| defined(__loongarch_double_float)) || \ | ||
| (defined(__riscv) && __riscv_xlen == 64 && \ | ||
| defined(__riscv_float_abi_double)) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,8 @@ | |
| #include "objc_msgSend.aarch64.S" | ||
| #elif defined(__riscv) && (__riscv_xlen == 64) && defined(__riscv_float_abi_double) | ||
| #include "objc_msgSend.riscv64.S" | ||
| #elif defined(__loongarch__) && defined(__loongarch_lp64) && defined(__loongarch_double_float) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
| #include "objc_msgSend.loongarch64.S" | ||
| #elif defined(__mips_n64) || defined(__mips_n32) | ||
| #include "objc_msgSend.mips.S" | ||
| #else | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| #define ARGUMENT_SPILL_SIZE (10*8 + 8*8) | ||
|
|
||
| .macro MSGSEND receiver, sel | ||
| .cfi_startproc | ||
| beqz \receiver, 3f // Skip everything if receiver is nil | ||
|
|
||
| andi $t0, \receiver, SMALLOBJ_MASK | ||
| bnez $t0, 5f | ||
|
|
||
| ld.d $t0, \receiver, 0 // Load class into t0 | ||
| 0: | ||
| ld.d $t0, $t0, DTABLE_OFFSET // dtable -> t0 | ||
| ld.d $t1, \sel, 0 // selector->index -> t1 | ||
| ld.w $t2, $t0, SHIFT_OFFSET // dtable->shift -> t2 | ||
|
|
||
| addi.d $t3, $zero, 8 | ||
| beq $t2, $t3, 1f | ||
| beqz $t2, 2f | ||
|
|
||
| srli.d $t2, $t1, 13 // Extract byte 3 of sel index and multiply by 2^3 | ||
| andi $t2, $t2, 0x7f8 | ||
| add.d $t2, $t0, $t2 | ||
| ld.d $t0, $t2, DATA_OFFSET | ||
| 1: | ||
| srli.d $t2, $t1, 5 // Extract byte 2 of sel index and multiply by 2^3 | ||
| andi $t2, $t2, 0x7f8 | ||
| add.d $t2, $t0, $t2 | ||
| ld.d $t0, $t2, DATA_OFFSET | ||
| 2: | ||
| slli.d $t2, $t1, 3 // Multiply by 2^3 | ||
| andi $t2, $t2, 0x7f8 | ||
| add.d $t2, $t0, $t2 | ||
| ld.d $t0, $t2, DATA_OFFSET // Slot pointer is now in t0 | ||
|
|
||
| beqz $t0, 4f // If the slot is nil, go to the C path | ||
|
|
||
| ld.d $t0, $t0, SLOT_OFFSET // Load the method from the slot | ||
| jr $t0 // Tail-call the method | ||
|
|
||
| 3: | ||
| move \receiver, $zero | ||
| move \sel, $zero | ||
| movgr2fr.d $fa0, $zero | ||
| movgr2fr.d $fa1, $zero | ||
| ret | ||
|
|
||
| 4: | ||
| addi.d $sp, $sp, -ARGUMENT_SPILL_SIZE | ||
|
|
||
| // Spill function arguments. | ||
| st.d $a0, $sp, 0 | ||
| st.d $a1, $sp, 8 | ||
| st.d $a2, $sp, 16 | ||
| st.d $a3, $sp, 24 | ||
| st.d $a4, $sp, 32 | ||
| st.d $a5, $sp, 40 | ||
| st.d $a6, $sp, 48 | ||
| st.d $a7, $sp, 56 | ||
|
|
||
| // Spill FP arguments. | ||
| fst.d $fa0, $sp, 64 | ||
| fst.d $fa1, $sp, 72 | ||
| fst.d $fa2, $sp, 80 | ||
| fst.d $fa3, $sp, 88 | ||
| fst.d $fa4, $sp, 96 | ||
| fst.d $fa5, $sp, 104 | ||
| fst.d $fa6, $sp, 112 | ||
| fst.d $fa7, $sp, 120 | ||
|
|
||
| st.d $fp, $sp, 128 | ||
| st.d $ra, $sp, 136 | ||
|
|
||
| addi.d $fp, $sp, 128 | ||
| addi.d $sp, $sp, -16 | ||
| st.d \receiver, $sp, 0 // Keep &self at sp for slowMsgLookup | ||
|
|
||
| // Use explicit DWARF register numbers for compatibility with older Clang IAS. | ||
| .cfi_def_cfa 22, 16 | ||
| .cfi_offset 22, -16 | ||
| .cfi_offset 1, -8 | ||
|
|
||
| move $a0, $sp // &self in first argument | ||
| move $a1, \sel | ||
| pcaddu18i $ra, %call36(CDECL(slowMsgLookup)) | ||
| jirl $ra, $ra, 0 | ||
|
|
||
| move $t0, $a0 // IMP -> t0 | ||
|
|
||
| ld.d $a0, $sp, 16 | ||
| ld.d $a1, $sp, 24 | ||
| ld.d $a2, $sp, 32 | ||
| ld.d $a3, $sp, 40 | ||
| ld.d $a4, $sp, 48 | ||
| ld.d $a5, $sp, 56 | ||
| ld.d $a6, $sp, 64 | ||
| ld.d $a7, $sp, 72 | ||
|
|
||
| fld.d $fa0, $sp, 80 | ||
| fld.d $fa1, $sp, 88 | ||
| fld.d $fa2, $sp, 96 | ||
| fld.d $fa3, $sp, 104 | ||
| fld.d $fa4, $sp, 112 | ||
| fld.d $fa5, $sp, 120 | ||
| fld.d $fa6, $sp, 128 | ||
| fld.d $fa7, $sp, 136 | ||
|
|
||
| ld.d $fp, $sp, 144 | ||
| ld.d $ra, $sp, 152 | ||
| ld.d \receiver, $sp, 0 | ||
|
|
||
| addi.d $sp, $sp, ARGUMENT_SPILL_SIZE | ||
| addi.d $sp, $sp, 16 | ||
|
|
||
| jr $t0 // Tail-call the method | ||
|
|
||
| 5: | ||
| pcalau12i $t1, %got_pc_hi20(CDECL(SmallObjectClasses)) | ||
| ld.d $t1, $t1, %got_pc_lo12(CDECL(SmallObjectClasses)) | ||
| slli.d $t0, $t0, 3 | ||
| ldx.d $t0, $t1, $t0 | ||
| b 0b | ||
| .cfi_endproc | ||
| .endm | ||
|
|
||
| .globl CDECL(objc_msgSend_fpret) | ||
| TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function) | ||
| .globl CDECL(objc_msgSend) | ||
| TYPE_DIRECTIVE(CDECL(objc_msgSend), %function) | ||
| .globl CDECL(objc_msgSend_stret) | ||
| TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function) | ||
| CDECL(objc_msgSend): | ||
| CDECL(objc_msgSend_fpret): | ||
| MSGSEND $a0, $a1 | ||
| CDECL(objc_msgSend_stret): | ||
| MSGSEND $a1, $a2 // Pointer to stack frame in a0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Linker relaxation related? Might be good to clarify for the record.