b2d10977a8
This reverts commit a03afef7f2e8ae363a97357ec75ffbfef372a9ea. Signed-off-by: Felix Fietkau <nbd@nbd.name>
372 lines
9.2 KiB
Diff
372 lines
9.2 KiB
Diff
From: Felix Fietkau <nbd@nbd.name>
|
|
Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
|
|
|
|
lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
|
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
|
---
|
|
arch/mips/Makefile | 5 +
|
|
arch/mips/include/asm/module.h | 5 +
|
|
arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++-
|
|
3 files changed, 284 insertions(+), 5 deletions(-)
|
|
|
|
--- a/arch/mips/Makefile
|
|
+++ b/arch/mips/Makefile
|
|
@@ -93,8 +93,18 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
|
|
cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
|
|
cflags-y += -msoft-float
|
|
LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
|
|
+ifdef CONFIG_64BIT
|
|
KBUILD_AFLAGS_MODULE += -mlong-calls
|
|
KBUILD_CFLAGS_MODULE += -mlong-calls
|
|
+else
|
|
+ ifdef CONFIG_DYNAMIC_FTRACE
|
|
+ KBUILD_AFLAGS_MODULE += -mlong-calls
|
|
+ KBUILD_CFLAGS_MODULE += -mlong-calls
|
|
+ else
|
|
+ KBUILD_AFLAGS_MODULE += -mno-long-calls
|
|
+ KBUILD_CFLAGS_MODULE += -mno-long-calls
|
|
+ endif
|
|
+endif
|
|
|
|
ifeq ($(CONFIG_RELOCATABLE),y)
|
|
LDFLAGS_vmlinux += --emit-relocs
|
|
--- a/arch/mips/include/asm/module.h
|
|
+++ b/arch/mips/include/asm/module.h
|
|
@@ -12,6 +12,11 @@ struct mod_arch_specific {
|
|
const struct exception_table_entry *dbe_start;
|
|
const struct exception_table_entry *dbe_end;
|
|
struct mips_hi16 *r_mips_hi16_list;
|
|
+
|
|
+ void *phys_plt_tbl;
|
|
+ void *virt_plt_tbl;
|
|
+ unsigned int phys_plt_offset;
|
|
+ unsigned int virt_plt_offset;
|
|
};
|
|
|
|
typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */
|
|
--- a/arch/mips/kernel/module.c
|
|
+++ b/arch/mips/kernel/module.c
|
|
@@ -44,14 +44,221 @@ struct mips_hi16 {
|
|
static LIST_HEAD(dbe_list);
|
|
static DEFINE_SPINLOCK(dbe_lock);
|
|
|
|
-#ifdef MODULE_START
|
|
+/*
|
|
+ * Get the potential max trampolines size required of the init and
|
|
+ * non-init sections. Only used if we cannot find enough contiguous
|
|
+ * physically mapped memory to put the module into.
|
|
+ */
|
|
+static unsigned int
|
|
+get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
|
|
+ const char *secstrings, unsigned int symindex, bool is_init)
|
|
+{
|
|
+ unsigned long ret = 0;
|
|
+ unsigned int i, j;
|
|
+ Elf_Sym *syms;
|
|
+
|
|
+ /* Everything marked ALLOC (this includes the exported symbols) */
|
|
+ for (i = 1; i < hdr->e_shnum; ++i) {
|
|
+ unsigned int info = sechdrs[i].sh_info;
|
|
+
|
|
+ if (sechdrs[i].sh_type != SHT_REL
|
|
+ && sechdrs[i].sh_type != SHT_RELA)
|
|
+ continue;
|
|
+
|
|
+ /* Not a valid relocation section? */
|
|
+ if (info >= hdr->e_shnum)
|
|
+ continue;
|
|
+
|
|
+ /* Don't bother with non-allocated sections */
|
|
+ if (!(sechdrs[info].sh_flags & SHF_ALLOC))
|
|
+ continue;
|
|
+
|
|
+ /* If it's called *.init*, and we're not init, we're
|
|
+ not interested */
|
|
+ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
|
|
+ != is_init)
|
|
+ continue;
|
|
+
|
|
+ syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
|
|
+ if (sechdrs[i].sh_type == SHT_REL) {
|
|
+ Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
|
|
+ unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
|
|
+
|
|
+ for (j = 0; j < size; ++j) {
|
|
+ Elf_Sym *sym;
|
|
+
|
|
+ if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
|
|
+ continue;
|
|
+
|
|
+ sym = syms + ELF_MIPS_R_SYM(rel[j]);
|
|
+ if (!is_init && sym->st_shndx != SHN_UNDEF)
|
|
+ continue;
|
|
+
|
|
+ ret += 4 * sizeof(int);
|
|
+ }
|
|
+ } else {
|
|
+ Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
|
|
+ unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
|
|
+
|
|
+ for (j = 0; j < size; ++j) {
|
|
+ Elf_Sym *sym;
|
|
+
|
|
+ if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
|
|
+ continue;
|
|
+
|
|
+ sym = syms + ELF_MIPS_R_SYM(rela[j]);
|
|
+ if (!is_init && sym->st_shndx != SHN_UNDEF)
|
|
+ continue;
|
|
+
|
|
+ ret += 4 * sizeof(int);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+#ifndef MODULE_START
|
|
+static void *alloc_phys(unsigned long size)
|
|
+{
|
|
+ unsigned order;
|
|
+ struct page *page;
|
|
+ struct page *p;
|
|
+
|
|
+ size = PAGE_ALIGN(size);
|
|
+ order = get_order(size);
|
|
+
|
|
+ page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
|
|
+ __GFP_THISNODE, order);
|
|
+ if (!page)
|
|
+ return NULL;
|
|
+
|
|
+ split_page(page, order);
|
|
+
|
|
+ /* mark all pages except for the last one */
|
|
+ for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
|
|
+ set_bit(PG_owner_priv_1, &p->flags);
|
|
+
|
|
+ for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
|
|
+ __free_page(p);
|
|
+
|
|
+ return page_address(page);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void free_phys(void *ptr)
|
|
+{
|
|
+ struct page *page;
|
|
+ bool free;
|
|
+
|
|
+ page = virt_to_page(ptr);
|
|
+ do {
|
|
+ free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
|
|
+ __free_page(page);
|
|
+ page++;
|
|
+ } while (free);
|
|
+}
|
|
+
|
|
+
|
|
void *module_alloc(unsigned long size)
|
|
{
|
|
+#ifdef MODULE_START
|
|
return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
|
|
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
|
|
__builtin_return_address(0));
|
|
+#else
|
|
+ void *ptr;
|
|
+
|
|
+ if (size == 0)
|
|
+ return NULL;
|
|
+
|
|
+ ptr = alloc_phys(size);
|
|
+
|
|
+ /* If we failed to allocate physically contiguous memory,
|
|
+ * fall back to regular vmalloc. The module loader code will
|
|
+ * create jump tables to handle long jumps */
|
|
+ if (!ptr)
|
|
+ return vmalloc(size);
|
|
+
|
|
+ return ptr;
|
|
+#endif
|
|
}
|
|
+
|
|
+static inline bool is_phys_addr(void *ptr)
|
|
+{
|
|
+#ifdef CONFIG_64BIT
|
|
+ return (KSEGX((unsigned long)ptr) == CKSEG0);
|
|
+#else
|
|
+ return (KSEGX(ptr) == KSEG0);
|
|
#endif
|
|
+}
|
|
+
|
|
+/* Free memory returned from module_alloc */
|
|
+void module_memfree(void *module_region)
|
|
+{
|
|
+ if (is_phys_addr(module_region))
|
|
+ free_phys(module_region);
|
|
+ else
|
|
+ vfree(module_region);
|
|
+}
|
|
+
|
|
+static void *__module_alloc(int size, bool phys)
|
|
+{
|
|
+ void *ptr;
|
|
+
|
|
+ if (phys)
|
|
+ ptr = kmalloc(size, GFP_KERNEL);
|
|
+ else
|
|
+ ptr = vmalloc(size);
|
|
+ return ptr;
|
|
+}
|
|
+
|
|
+static void __module_free(void *ptr)
|
|
+{
|
|
+ if (is_phys_addr(ptr))
|
|
+ kfree(ptr);
|
|
+ else
|
|
+ vfree(ptr);
|
|
+}
|
|
+
|
|
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
|
|
+ char *secstrings, struct module *mod)
|
|
+{
|
|
+ unsigned int symindex = 0;
|
|
+ unsigned int core_size, init_size;
|
|
+ int i;
|
|
+
|
|
+ mod->arch.phys_plt_offset = 0;
|
|
+ mod->arch.virt_plt_offset = 0;
|
|
+ mod->arch.phys_plt_tbl = NULL;
|
|
+ mod->arch.virt_plt_tbl = NULL;
|
|
+
|
|
+ if (IS_ENABLED(CONFIG_64BIT))
|
|
+ return 0;
|
|
+
|
|
+ for (i = 1; i < hdr->e_shnum; i++)
|
|
+ if (sechdrs[i].sh_type == SHT_SYMTAB)
|
|
+ symindex = i;
|
|
+
|
|
+ core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
|
|
+ init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
|
|
+
|
|
+ if ((core_size + init_size) == 0)
|
|
+ return 0;
|
|
+
|
|
+ mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
|
|
+ if (!mod->arch.phys_plt_tbl)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
|
|
+ if (!mod->arch.virt_plt_tbl) {
|
|
+ __module_free(mod->arch.phys_plt_tbl);
|
|
+ mod->arch.phys_plt_tbl = NULL;
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
|
|
static int apply_r_mips_none(struct module *me, u32 *location,
|
|
u32 base, Elf_Addr v, bool rela)
|
|
@@ -67,9 +274,40 @@ static int apply_r_mips_32(struct module
|
|
return 0;
|
|
}
|
|
|
|
+static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
|
|
+ void *start, Elf_Addr v)
|
|
+{
|
|
+ unsigned *tramp = start + *plt_offset;
|
|
+ *plt_offset += 4 * sizeof(int);
|
|
+
|
|
+ /* adjust carry for addiu */
|
|
+ if (v & 0x00008000)
|
|
+ v += 0x10000;
|
|
+
|
|
+ tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */
|
|
+ tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */
|
|
+ tramp[2] = 0x03200008; /* jr t9 */
|
|
+ tramp[3] = 0x00000000; /* nop */
|
|
+
|
|
+ return (Elf_Addr) tramp;
|
|
+}
|
|
+
|
|
+static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
|
|
+{
|
|
+ if (is_phys_addr(location))
|
|
+ return add_plt_entry_to(&me->arch.phys_plt_offset,
|
|
+ me->arch.phys_plt_tbl, v);
|
|
+ else
|
|
+ return add_plt_entry_to(&me->arch.virt_plt_offset,
|
|
+ me->arch.virt_plt_tbl, v);
|
|
+
|
|
+}
|
|
+
|
|
static int apply_r_mips_26(struct module *me, u32 *location,
|
|
u32 base, Elf_Addr v, bool rela)
|
|
{
|
|
+ u32 ofs = base & 0x03ffffff;
|
|
+
|
|
if (v % 4) {
|
|
pr_err("module %s: dangerous R_MIPS_26 relocation\n",
|
|
me->name);
|
|
@@ -77,13 +315,17 @@ static int apply_r_mips_26(struct module
|
|
}
|
|
|
|
if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
|
|
- pr_err("module %s: relocation overflow\n",
|
|
- me->name);
|
|
- return -ENOEXEC;
|
|
+ v = add_plt_entry(me, location, v + (ofs << 2));
|
|
+ if (!v) {
|
|
+ pr_err("module %s: relocation overflow\n",
|
|
+ me->name);
|
|
+ return -ENOEXEC;
|
|
+ }
|
|
+ ofs = 0;
|
|
}
|
|
|
|
*location = (*location & ~0x03ffffff) |
|
|
- ((base + (v >> 2)) & 0x03ffffff);
|
|
+ ((ofs + (v >> 2)) & 0x03ffffff);
|
|
|
|
return 0;
|
|
}
|
|
@@ -459,9 +701,36 @@ int module_finalize(const Elf_Ehdr *hdr,
|
|
list_add(&me->arch.dbe_list, &dbe_list);
|
|
spin_unlock_irq(&dbe_lock);
|
|
}
|
|
+
|
|
+ /* Get rid of the fixup trampoline if we're running the module
|
|
+ * from physically mapped address space */
|
|
+ if (me->arch.phys_plt_offset == 0) {
|
|
+ __module_free(me->arch.phys_plt_tbl);
|
|
+ me->arch.phys_plt_tbl = NULL;
|
|
+ }
|
|
+ if (me->arch.virt_plt_offset == 0) {
|
|
+ __module_free(me->arch.virt_plt_tbl);
|
|
+ me->arch.virt_plt_tbl = NULL;
|
|
+ }
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
+void module_arch_freeing_init(struct module *mod)
|
|
+{
|
|
+ if (mod->state == MODULE_STATE_LIVE)
|
|
+ return;
|
|
+
|
|
+ if (mod->arch.phys_plt_tbl) {
|
|
+ __module_free(mod->arch.phys_plt_tbl);
|
|
+ mod->arch.phys_plt_tbl = NULL;
|
|
+ }
|
|
+ if (mod->arch.virt_plt_tbl) {
|
|
+ __module_free(mod->arch.virt_plt_tbl);
|
|
+ mod->arch.virt_plt_tbl = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
void module_arch_cleanup(struct module *mod)
|
|
{
|
|
spin_lock_irq(&dbe_lock);
|