diff -Naur linux-2.6.19.2.orig/Documentation/dontdiff linux-2.6.19.2/Documentation/dontdiff --- linux-2.6.19.2.orig/Documentation/dontdiff 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/Documentation/dontdiff 2007-01-12 23:27:23.000000000 +0000 @@ -55,7 +55,7 @@ aicasm aicdb.h* asm -asm-offsets.* +asm-offsets.h asm_offsets.* autoconf.h* bbootsect @@ -143,4 +143,5 @@ vsyscall.lds wanxlfw.inc uImage +utsrelease.h zImage diff -Naur linux-2.6.19.2.orig/Makefile linux-2.6.19.2/Makefile --- linux-2.6.19.2.orig/Makefile 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/Makefile 2007-01-12 23:27:23.000000000 +0000 @@ -313,7 +313,7 @@ CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) -CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ +CFLAGS := -Wall -W -Wno-unused -Wno-sign-compare -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common AFLAGS := -D__ASSEMBLY__ @@ -559,7 +559,7 @@ ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff -Naur linux-2.6.19.2.orig/arch/alpha/kernel/module.c linux-2.6.19.2/arch/alpha/kernel/module.c --- linux-2.6.19.2.orig/arch/alpha/kernel/module.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/alpha/kernel/module.c 2007-01-12 23:27:20.000000000 +0000 @@ -177,7 +177,7 @@ /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff -Naur linux-2.6.19.2.orig/arch/alpha/kernel/osf_sys.c linux-2.6.19.2/arch/alpha/kernel/osf_sys.c --- linux-2.6.19.2.orig/arch/alpha/kernel/osf_sys.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/alpha/kernel/osf_sys.c 2007-01-12 23:27:20.000000000 +0000 @@ -1277,6 +1277,10 @@ merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != (unsigned long) -ENOMEM) @@ -1284,8 +1288,8 @@ } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(current->mm->mmap_base), len, limit); + if (addr != (unsigned long) -ENOMEM) return addr; diff -Naur linux-2.6.19.2.orig/arch/alpha/kernel/ptrace.c linux-2.6.19.2/arch/alpha/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/alpha/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/alpha/kernel/ptrace.c 2007-01-12 23:27:20.000000000 +0000 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -283,6 +284,9 @@ goto out_notsk; } + if (gr_handle_ptrace(child, request)) + goto out; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -Naur linux-2.6.19.2.orig/arch/alpha/mm/fault.c linux-2.6.19.2/arch/alpha/mm/fault.c --- linux-2.6.19.2.orig/arch/alpha/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/alpha/mm/fault.c 2007-01-12 23:27:20.000000000 +0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,124 @@ __reload_thread(pcb); } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long*)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif /* * This routine handles page faults. It determines the address, @@ -132,8 +251,29 @@ good_area: si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc) + goto bad_area; + + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->pc, (void*)rdusp()); + do_exit(SIGKILL); +#else goto bad_area; +#endif + + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff -Naur linux-2.6.19.2.orig/arch/arm/mm/mmap.c linux-2.6.19.2/arch/arm/mm/mmap.c --- linux-2.6.19.2.orig/arch/arm/mm/mmap.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/arm/mm/mmap.c 2007-01-12 23:27:21.000000000 +0000 @@ -61,6 +61,10 @@ if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -75,7 +79,7 @@ if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -92,8 +96,8 @@ * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } diff -Naur linux-2.6.19.2.orig/arch/i386/Kconfig linux-2.6.19.2/arch/i386/Kconfig --- linux-2.6.19.2.orig/arch/i386/Kconfig 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/Kconfig 2007-01-12 23:27:21.000000000 +0000 @@ -803,7 +803,7 @@ config COMPAT_VDSO bool "Compat VDSO support" - default y + default n depends on !PARAVIRT help Map the VDSO to the predictable old-style address too. @@ -999,7 +999,7 @@ choice prompt "PCI access mode" depends on PCI && !X86_VISWS - default PCI_GOANY + default PCI_GODIRECT ---help--- On PCI systems, the BIOS can be used to detect the PCI devices and determine their configuration. However, some old PCI motherboards @@ -1031,7 +1031,7 @@ config PCI_BIOS bool - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) + depends on !X86_VISWS && PCI && PCI_GOBIOS default y config PCI_DIRECT diff -Naur linux-2.6.19.2.orig/arch/i386/Kconfig.cpu linux-2.6.19.2/arch/i386/Kconfig.cpu --- linux-2.6.19.2.orig/arch/i386/Kconfig.cpu 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/Kconfig.cpu 2007-01-12 23:27:21.000000000 +0000 @@ -252,7 +252,7 @@ config X86_F00F_BUG bool - depends on M586MMX || M586TSC || M586 || M486 || M386 + depends on (M586MMX || M586TSC || M586 || M486 || M386) && !PAX_KERNEXEC default y config X86_WP_WORKS_OK @@ -282,7 +282,7 @@ config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 default y config X86_GOOD_APIC diff -Naur linux-2.6.19.2.orig/arch/i386/Kconfig.debug linux-2.6.19.2/arch/i386/Kconfig.debug --- linux-2.6.19.2.orig/arch/i386/Kconfig.debug 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/Kconfig.debug 2007-01-12 23:27:21.000000000 +0000 @@ -48,7 +48,7 @@ config DEBUG_RODATA bool "Write protect kernel read-only data structures" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !PAX_KERNEXEC help Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const diff -Naur linux-2.6.19.2.orig/arch/i386/boot/compressed/head.S linux-2.6.19.2/arch/i386/boot/compressed/head.S --- linux-2.6.19.2.orig/arch/i386/boot/compressed/head.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/boot/compressed/head.S 2007-01-12 23:27:21.000000000 +0000 @@ -39,11 +39,13 @@ movl %eax,%gs lss stack_start,%esp + movl 0x000000,%ecx xorl %eax,%eax 1: incl %eax # check that A20 really IS enabled movl %eax,0x000000 # loop forever if it isn't cmpl %eax,0x100000 je 1b + movl %ecx,0x000000 /* * Initialize eflags. Some BIOS's leave bits like NT set. This would diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/alternative.c linux-2.6.19.2/arch/i386/kernel/alternative.c --- linux-2.6.19.2.orig/arch/i386/kernel/alternative.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/alternative.c 2007-01-12 23:27:21.000000000 +0000 @@ -3,6 +3,7 @@ #include #include #include +#include static int no_replacement = 0; static int smp_alt_once = 0; @@ -142,6 +143,12 @@ u8 *instr; int diff, i, k; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { BUG_ON(a->replacementlen > a->instrlen); @@ -156,16 +163,21 @@ __FUNCTION__, a->instr, instr); } #endif - memcpy(instr, a->replacement, a->replacementlen); + memcpy(instr + __KERNEL_TEXT_OFFSET, a->replacement, a->replacementlen); diff = a->instrlen - a->replacementlen; /* Pad the rest with nops */ for (i = a->replacementlen; diff > 0; diff -= k, i += k) { k = diff; if (k > ASM_NOP_MAX) k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); + memcpy(a->instr + i + __KERNEL_TEXT_OFFSET, noptable[k], k); } } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } #ifdef CONFIG_SMP @@ -174,50 +186,96 @@ { struct alt_instr *a; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { memcpy(a->replacement + a->replacementlen, - a->instr, + a->instr + __KERNEL_TEXT_OFFSET, a->instrlen); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) { struct alt_instr *a; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + for (a = start; a < end; a++) { - memcpy(a->instr, + memcpy(a->instr + __KERNEL_TEXT_OFFSET, a->replacement + a->replacementlen, a->instrlen); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) { - u8 **ptr; + u8 *ptr; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) + pax_open_kernel(cr0); +#endif + + for (; start < end; start++) { + ptr = *start + __KERNEL_TEXT_OFFSET; + if (ptr < text) continue; - if (*ptr > text_end) + if (ptr > text_end) continue; - **ptr = 0xf0; /* lock prefix */ + *ptr = 0xf0; /* lock prefix */ }; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { unsigned char **noptable = find_nop_table(); - u8 **ptr; + u8 *ptr; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) + for (; start < end; start++) { + ptr = *start + __KERNEL_TEXT_OFFSET; + if (ptr < text) continue; - if (*ptr > text_end) + if (ptr > text_end) continue; - **ptr = noptable[1][0]; + *ptr = noptable[1][0]; }; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } struct smp_alt_module { diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/apic.c linux-2.6.19.2/arch/i386/kernel/apic.c --- linux-2.6.19.2.orig/arch/i386/kernel/apic.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/apic.c 2007-01-12 23:27:21.000000000 +0000 @@ -1197,7 +1197,7 @@ { profile_tick(CPU_PROFILING); #ifdef CONFIG_SMP - update_process_times(user_mode_vm(get_irq_regs())); + update_process_times(user_mode(get_irq_regs())); #endif /* diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/apm.c linux-2.6.19.2/arch/i386/kernel/apm.c --- linux-2.6.19.2.orig/arch/i386/kernel/apm.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/apm.c 2007-01-12 23:27:21.000000000 +0000 @@ -235,7 +235,7 @@ #include "io_ports.h" extern unsigned long get_cmos_time(void); -extern void machine_real_restart(unsigned char *, int); +extern void machine_real_restart(const unsigned char *, unsigned int); #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -608,9 +608,18 @@ struct desc_struct save_desc_40; struct desc_struct *gdt; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + cpus = apm_save_cpus(); cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; @@ -621,6 +630,11 @@ APM_DO_RESTORE_SEGS; apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); apm_restore_cpus(cpus); @@ -651,9 +665,18 @@ struct desc_struct save_desc_40; struct desc_struct *gdt; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + cpus = apm_save_cpus(); cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; @@ -664,6 +687,11 @@ APM_DO_RESTORE_SEGS; apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); apm_restore_cpus(cpus); return error; @@ -931,7 +959,7 @@ static void apm_power_off(void) { - unsigned char po_bios_call[] = { + const unsigned char po_bios_call[] = { 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ 0x8e, 0xd0, /* movw ax,ss */ 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/asm-offsets.c linux-2.6.19.2/arch/i386/kernel/asm-offsets.c --- linux-2.6.19.2.orig/arch/i386/kernel/asm-offsets.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/asm-offsets.c 2007-01-12 23:27:21.000000000 +0000 @@ -71,6 +71,7 @@ sizeof(struct tss_struct)); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(PTRS_PER_PTE_asm, PTRS_PER_PTE); DEFINE(VDSO_PRELINK, VDSO_PRELINK); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/cpu/common.c linux-2.6.19.2/arch/i386/kernel/cpu/common.c --- linux-2.6.19.2.orig/arch/i386/kernel/cpu/common.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/cpu/common.c 2007-01-12 23:27:21.000000000 +0000 @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -21,16 +20,18 @@ #include "cpu.h" -DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); -EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); - DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) +static int disable_x86_sep __cpuinitdata = 1; +#else static int disable_x86_sep __cpuinitdata; +#endif struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -600,11 +601,10 @@ void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct * t = init_tss + cpu; struct thread_struct *thread = ¤t->thread; - struct desc_struct *gdt; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -621,36 +621,12 @@ set_in_cr4(X86_CR4_TSD); } - /* The CPU hotplug case */ - if (cpu_gdt_descr->address) { - gdt = (struct desc_struct *)cpu_gdt_descr->address; - memset(gdt, 0, PAGE_SIZE); - goto old_gdt; - } - /* - * This is a horrible hack to allocate the GDT. The problem - * is that cpu_init() is called really early for the boot CPU - * (and hence needs bootmem) but much later for the secondary - * CPUs, when bootmem will have gone away - */ - if (NODE_DATA(0)->bdata->node_bootmem_map) { - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); - /* alloc_bootmem_pages panics on failure, so no check */ - memset(gdt, 0, PAGE_SIZE); - } else { - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); - if (unlikely(!gdt)) { - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - for (;;) - local_irq_enable(); - } - } -old_gdt: /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - memcpy(gdt, cpu_gdt_table, GDT_SIZE); + if (cpu) + memcpy(gdt, cpu_gdt_table, GDT_SIZE); /* Set up GDT entry for 16bit stack */ *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= @@ -658,10 +634,10 @@ ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | (CPU_16BIT_STACK_SIZE - 1); - cpu_gdt_descr->size = GDT_SIZE - 1; - cpu_gdt_descr->address = (unsigned long)gdt; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; + cpu_gdt_descr[cpu].address = (unsigned long)gdt; - load_gdt(cpu_gdt_descr); + load_gdt(&cpu_gdt_descr[cpu]); load_idt(&idt_descr); /* diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/crash.c linux-2.6.19.2/arch/i386/kernel/crash.c --- linux-2.6.19.2.orig/arch/i386/kernel/crash.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/crash.c 2007-01-12 23:27:21.000000000 +0000 @@ -117,7 +117,7 @@ return NOTIFY_STOP; local_irq_disable(); - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/doublefault.c linux-2.6.19.2/arch/i386/kernel/doublefault.c --- linux-2.6.19.2.orig/arch/i386/kernel/doublefault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/doublefault.c 2007-01-12 23:27:21.000000000 +0000 @@ -11,7 +11,7 @@ #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) +#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2) #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000) @@ -57,10 +57,10 @@ .eip = (unsigned long) doublefault_fn, .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ .esp = STACK_START, - .es = __USER_DS, + .es = __KERNEL_DS, .cs = __KERNEL_CS, .ss = __KERNEL_DS, - .ds = __USER_DS, + .ds = __KERNEL_DS, .__cr3 = __pa(swapper_pg_dir) }; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/efi.c linux-2.6.19.2/arch/i386/kernel/efi.c --- linux-2.6.19.2.orig/arch/i386/kernel/efi.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/efi.c 2007-01-12 23:27:21.000000000 +0000 @@ -63,82 +63,43 @@ static unsigned long efi_rt_eflags; static DEFINE_SPINLOCK(efi_rt_lock); -static pgd_t efi_bak_pg_dir_pointer[2]; +static pgd_t __initdata efi_bak_pg_dir_pointer[KERNEL_PGD_PTRS] __attribute__ ((aligned (4096))); static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) { - unsigned long cr4; - unsigned long temp; - struct Xgt_desc_struct *cpu_gdt_descr; - spin_lock(&efi_rt_lock); local_irq_save(efi_rt_eflags); - cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - - /* - * If I don't have PSE, I should just duplicate two entries in page - * directory. If I have PSE, I just need to duplicate one entry in - * page directory. - */ - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - swapper_pg_dir[0].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - } else { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - efi_bak_pg_dir_pointer[1].pgd = - swapper_pg_dir[pgd_index(0x400000)].pgd; - swapper_pg_dir[pgd_index(0)].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - temp = PAGE_OFFSET + 0x400000; - swapper_pg_dir[pgd_index(0x400000)].pgd = - swapper_pg_dir[pgd_index(temp)].pgd; - } + clone_pgd_range(efi_bak_pg_dir_pointer, swapper_pg_dir, KERNEL_PGD_PTRS); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + USER_PGD_PTRS >= KERNEL_PGD_PTRS ? KERNEL_PGD_PTRS : USER_PGD_PTRS); /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); - cpu_gdt_descr->address = __pa(cpu_gdt_descr->address); - load_gdt(cpu_gdt_descr); + cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); + load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])); } static void efi_call_phys_epilog(void) __releases(efi_rt_lock) { - unsigned long cr4; - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - - cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); - load_gdt(cpu_gdt_descr); + cpu_gdt_descr[0].address = (unsigned long) __va(cpu_gdt_descr[0].address); + load_gdt(&cpu_gdt_descr[0]); - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - } else { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - swapper_pg_dir[pgd_index(0x400000)].pgd = - efi_bak_pg_dir_pointer[1].pgd; - } + clone_pgd_range(swapper_pg_dir, efi_bak_pg_dir_pointer, KERNEL_PGD_PTRS); /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); local_irq_restore(efi_rt_eflags); spin_unlock(&efi_rt_lock); } -static efi_status_t +static efi_status_t __init phys_efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, u32 descriptor_version, @@ -154,7 +115,7 @@ return status; } -static efi_status_t +static efi_status_t __init phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { efi_status_t status; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/efi_stub.S linux-2.6.19.2/arch/i386/kernel/efi_stub.S --- linux-2.6.19.2.orig/arch/i386/kernel/efi_stub.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/efi_stub.S 2007-01-12 23:27:21.000000000 +0000 @@ -6,6 +6,7 @@ */ #include +#include #include /* @@ -20,7 +21,7 @@ * service functions will comply with gcc calling convention, too. */ -.text +__INIT ENTRY(efi_call_phys) /* * 0. The function can only be called in Linux kernel. So CS has been @@ -36,9 +37,7 @@ * The mapping of lower virtual memory has been created in prelog and * epilog. */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx + jmp 1f-__PAGE_OFFSET 1: /* @@ -47,14 +46,8 @@ * parameter 2, ..., param n. To make things easy, we save the return * address of efi_call_phys in a global variable. */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx + popl (saved_return_addr) + popl (efi_rt_function_ptr) /* * 3. Clear PG bit in %CR0. @@ -73,9 +66,8 @@ /* * 5. Call the physical function. */ - jmp *%ecx + call *(efi_rt_function_ptr-__PAGE_OFFSET) -2: /* * 6. After EFI runtime service returns, control will return to * following instruction. We'd better readjust stack pointer first. @@ -85,37 +77,29 @@ /* * 7. Restore PG bit */ - movl %cr0, %edx - orl $0x80000000, %edx - movl %edx, %cr0 - jmp 1f -1: /* * 8. Now restore the virtual mode from flat mode by * adding EIP with PAGE_OFFSET. */ - movl $1f, %edx - jmp *%edx + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + jmp 1f+__PAGE_OFFSET 1: /* * 9. Balance the stack. And because EAX contain the return value, * we'd better not clobber it. */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx + pushl (efi_rt_function_ptr) /* - * 10. Push the saved return address onto the stack and return. + * 10. Return to the saved return address. */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret + jmpl *(saved_return_addr) .previous -.data +__INITDATA saved_return_addr: .long 0 efi_rt_function_ptr: diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/entry.S linux-2.6.19.2/arch/i386/kernel/entry.S --- linux-2.6.19.2.orig/arch/i386/kernel/entry.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/entry.S 2007-01-12 23:27:21.000000000 +0000 @@ -82,6 +82,8 @@ #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit #define INTERRUPT_RETURN iret #define GET_CR0_INTO_EAX movl %cr0, %eax +#define GET_CR0_INTO_EDX movl %cr0, %edx +#define SET_CR0_FROM_EDX movl %edx, %cr0 #ifdef CONFIG_PREEMPT #define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF @@ -105,7 +107,7 @@ #define resume_userspace_sig resume_userspace #endif -#define SAVE_ALL \ +#define __SAVE_ALL(_DS) \ cld; \ pushl %es; \ CFI_ADJUST_CFA_OFFSET 4;\ @@ -134,10 +136,24 @@ pushl %ebx; \ CFI_ADJUST_CFA_OFFSET 4;\ CFI_REL_OFFSET ebx, 0;\ - movl $(__USER_DS), %edx; \ + movl $(_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; +#ifdef CONFIG_PAX_KERNEXEC +#define SAVE_ALL \ + __SAVE_ALL(__KERNEL_DS) \ + GET_CR0_INTO_EDX; \ + movl %edx, %esi; \ + orl $0x10000, %edx; \ + xorl %edx, %esi; \ + SET_CR0_FROM_EDX +#elif defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) +#define SAVE_ALL __SAVE_ALL(__KERNEL_DS) +#else +#define SAVE_ALL __SAVE_ALL(__USER_DS) +#endif + #define RESTORE_INT_REGS \ popl %ebx; \ CFI_ADJUST_CFA_OFFSET -4;\ @@ -245,7 +261,18 @@ movb CS(%esp), %al andl $(VM_MASK | SEGMENT_RPL_MASK), %eax cmpl $USER_RPL, %eax + +#ifdef CONFIG_PAX_KERNEXEC + jae resume_userspace + + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX + jmp resume_kernel +#else jb resume_kernel # not returning to v8086 or userspace +#endif + ENTRY(resume_userspace) DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -301,10 +328,9 @@ /*CFI_REL_OFFSET cs, 0*/ /* * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) + GET_THREAD_INFO(%ebp) + pushl TI_sysenter_return(%ebp) CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 @@ -312,9 +338,20 @@ * Load the potential sixth argument from user stack. * Careful about security. */ + movl 12(%esp),%ebp + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl $(__USER_DS) + CFI_ADJUST_CFA_OFFSET 4 + pop %ds + CFI_ADJUST_CFA_OFFSET -4 +1: movl %ds:(%ebp),%ebp +#else cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault 1: movl (%ebp),%ebp +#endif + .section __ex_table,"a" .align 4 .long 1b,syscall_fault @@ -337,14 +374,36 @@ movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_PAX_RANDKSTACK + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + call pax_randomize_kstack + popl %eax + CFI_ADJUST_CFA_OFFSET -4 +#endif + /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON +1: mov DS(%esp), %ds +2: mov ES(%esp), %es ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC +.section .fixup,"ax" +3: movl $0,DS(%esp) + jmp 1b +4: movl $0,ES(%esp) + jmp 2b +.previous +.section __ex_table,"a" + .align 4 + .long 1b,3b + .long 2b,4b +.previous # system call handler stub ENTRY(system_call) @@ -375,6 +434,10 @@ testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work +#ifdef CONFIG_PAX_RANDKSTACK + call pax_randomize_kstack +#endif + restore_all: movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS # Warning: OLDSS(%esp) contains the wrong/random values if we @@ -553,7 +616,7 @@ * Build the entry stubs and pointer table with * some assembler magic. */ -.data +.section .rodata,"a",@progbits ENTRY(interrupt) .text @@ -568,7 +631,7 @@ 1: pushl $~(vector) CFI_ADJUST_CFA_OFFSET 4 jmp common_interrupt -.data +.section .rodata,"a",@progbits .long 1b .text vector=vector+1 @@ -642,12 +705,21 @@ popl %ecx CFI_ADJUST_CFA_OFFSET -4 /*CFI_REGISTER es, ecx*/ + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + movl %edx, %esi + orl $0x10000, %edx + xorl %edx, %esi + SET_CR0_FROM_EDX +#endif + movl ES(%esp), %edi # get the function address movl ORIG_EAX(%esp), %edx # get the error code movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) /*CFI_REL_OFFSET es, ES*/ - movl $(__USER_DS), %ecx + movl $(__KERNEL_DS), %ecx movl %ecx, %ds movl %ecx, %es movl %esp,%eax # pt_regs pointer @@ -778,6 +850,13 @@ xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX +#endif + jmp restore_nocheck_notrace CFI_ENDPROC @@ -820,6 +899,13 @@ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved xorl %edx,%edx # zero error code call do_nmi + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX +#endif + RESTORE_REGS lss 12+4(%esp), %esp # back to 16bit stack 1: INTERRUPT_RETURN @@ -957,8 +1043,8 @@ movl %edi, EDI(%edx) movl %ebp, EBP(%edx) movl %ebx, EAX(%edx) - movl $__USER_DS, DS(%edx) - movl $__USER_DS, ES(%edx) + movl $__KERNEL_DS, DS(%edx) + movl $__KERNEL_DS, ES(%edx) movl %ebx, ORIG_EAX(%edx) movl %ecx, EIP(%edx) movl 12(%esp), %ecx @@ -987,7 +1073,6 @@ CFI_ENDPROC ENDPROC(kernel_thread_helper) -.section .rodata,"a" #include "syscall_table.S" syscall_table_size=(.-sys_call_table) diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/head.S linux-2.6.19.2/arch/i386/kernel/head.S --- linux-2.6.19.2.orig/arch/i386/kernel/head.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/head.S 2007-01-12 23:27:21.000000000 +0000 @@ -45,6 +45,16 @@ */ #define INIT_MAP_BEYOND_END (128*1024) +#ifdef CONFIG_PAX_KERNEXEC +/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */ +.fill 4096,1,0xcc +#endif + +/* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) /* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, @@ -66,6 +76,36 @@ movl %eax,%fs movl %eax,%gs +#ifdef CONFIG_PAX_MEMORY_UDEREF + /* check for VMware */ + movl $0x564d5868,%eax + xorl %ebx,%ebx + movl $0xa,%ecx + movl $0x5658,%edx + in (%dx),%eax + cmpl $0x564d5868,%ebx + jz 1f + + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),%eax + movl %eax,(cpu_gdt_table - __PAGE_OFFSET + GDT_ENTRY_KERNEL_DS * 8 + 4) + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0f300),%eax + movl %eax,(cpu_gdt_table - __PAGE_OFFSET + GDT_ENTRY_DEFAULT_USER_DS * 8 + 4) +1: +#endif + +#ifdef CONFIG_PAX_KERNEXEC + movl $ __KERNEL_TEXT_OFFSET,%eax + movw %ax,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 2) + rorl $16,%eax + movb %al,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 4) + movb %ah,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 7) + + movb %al,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 4) + movb %ah,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 7) + rorl $16,%eax + movw %ax,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 2) +#endif + /* * Clear BSS first so that there are no surprises... * No need to cld as DF is already clear from cld above... @@ -113,24 +153,42 @@ * Warning: don't use %esi or the stack in this code. However, %esp * can be used as a GPR if you really need it... */ -page_pde_offset = (__PAGE_OFFSET >> 20); - +#ifdef CONFIG_X86_PAE +page_pde_offset = ((__PAGE_OFFSET >> 21) * (4096 / PTRS_PER_PTE_asm)); +#else +page_pde_offset = ((__PAGE_OFFSET >> 22) * (4096 / PTRS_PER_PTE_asm)); +#endif movl $(pg0 - __PAGE_OFFSET), %edi +#ifdef CONFIG_X86_PAE + movl $(swapper_pm_dir - __PAGE_OFFSET), %edx +#else movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ +#endif + movl $0x063, %eax /* 0x063 = DIRTY+ACCESSED+PRESENT+RW */ 10: - leal 0x007(%edi),%ecx /* Create PDE entry */ + leal 0x063(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ +#ifdef CONFIG_X86_PAE + movl $0,4(%edx) + movl $0,page_pde_offset+4(%edx) + addl $8,%edx + movl $512, %ecx +#else addl $4,%edx movl $1024, %ecx +#endif 11: stosl +#ifdef CONFIG_X86_PAE + movl $0,(%edi) + addl $4,%edi +#endif addl $0x1000,%eax loop 11b /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp + /* bytes beyond the end of our own page tables; the +0x063 is the attribute bits */ + leal (INIT_MAP_BEYOND_END+0x063)(%edi),%ebp cmpl %ebp,%eax jb 10b movl %edi,(init_pg_tables_end - __PAGE_OFFSET) @@ -153,6 +211,11 @@ movl %eax,%fs movl %eax,%gs + /* This is a secondary processor (AP) */ + xorl %ebx,%ebx + incl %ebx +#endif /* CONFIG_SMP */ + /* * New page tables may be in 4Mbyte page mode and may * be using the global pages. @@ -168,26 +231,27 @@ * not yet offset PAGE_OFFSET.. */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET +3: movl cr4_bits,%edx andl %edx,%edx - jz 6f + jz 5f movl %cr4,%eax # Turn on paging options (PSE,PAE,..) orl %edx,%eax movl %eax,%cr4 - btl $5, %eax # check if PAE is enabled - jnc 6f +#ifdef CONFIG_X86_PAE + movl %ebx,%edi /* Check if extended functions are implemented */ movl $0x80000000, %eax cpuid cmpl $0x80000000, %eax - jbe 6f + jbe 4f mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ btl $20, %edx - jnc 6f + jnc 4f /* Setup EFER (Extended Feature Enable Register) */ movl $0xc0000080, %ecx @@ -196,14 +260,12 @@ btsl $11, %eax /* Make changes effective */ wrmsr + btsl $63,__supported_pte_mask-__PAGE_OFFSET -6: - /* This is a secondary processor (AP) */ - xorl %ebx,%ebx - incl %ebx - -3: -#endif /* CONFIG_SMP */ +4: + movl %edi,%ebx +#endif +5: /* * Enable paging @@ -228,9 +290,7 @@ #ifdef CONFIG_SMP andl %ebx,%ebx - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: + jnz checkCPUtype /* Initial CPU cleans BSS */ #endif /* CONFIG_SMP */ /* @@ -307,8 +367,6 @@ ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - - movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds movl %eax,%es @@ -433,8 +491,8 @@ /* This is the default interrupt "handler" :-) */ ALIGN ignore_int: - cld #ifdef CONFIG_PRINTK + cld pushl %eax pushl %ecx pushl %edx @@ -465,32 +523,50 @@ #endif iret -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - -/* - * BSS section - */ -.section ".bss.page_aligned","w" +.section .swapper_pg_dir,"a",@progbits ENTRY(swapper_pg_dir) +#ifdef CONFIG_X86_PAE + .long swapper_pm_dir-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*8-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*16-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*24-__PAGE_OFFSET+1 + .long 0 +#else .fill 1024,4,0 +#endif + +#ifdef CONFIG_X86_PAE +.section .swapper_pm_dir,"a",@progbits +ENTRY(swapper_pm_dir) + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 +#endif + +.section .empty_zero_page,"a",@progbits ENTRY(empty_zero_page) .fill 4096,1,0 /* - * This starts the data section. - */ -.data + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +.section .idt,"a",@progbits +ENTRY(idt_table) + .fill 256,8,0 + +.section .rodata,"a",@progbits +ready: .byte 0 ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE + .long init_thread_union+THREAD_SIZE-8 .long __BOOT_DS -ready: .byte 0 - early_recursion_flag: .long 0 @@ -525,10 +601,12 @@ # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address -cpu_gdt_descr: +ENTRY(cpu_gdt_descr) .word GDT_ENTRIES*8-1 .long cpu_gdt_table + .fill NR_CPUS*8-6,1,0 # space for the other GDT descriptors + /* * The boot_gdt_table must mirror the equivalent in setup.S and is * used only for booting. @@ -536,13 +614,13 @@ .align L1_CACHE_BYTES ENTRY(boot_gdt_table) .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align L1_CACHE_BYTES + .align PAGE_SIZE_asm ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ @@ -557,10 +635,10 @@ .quad 0x0000000000000000 /* 0x53 reserved */ .quad 0x0000000000000000 /* 0x5b reserved */ - .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ @@ -570,24 +648,30 @@ * They code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ - .quad 0x00409a000000ffff /* 0x90 32-bit code */ - .quad 0x00009a000000ffff /* 0x98 16-bit code */ - .quad 0x000092000000ffff /* 0xa0 16-bit data */ - .quad 0x0000920000000000 /* 0xa8 16-bit data */ - .quad 0x0000920000000000 /* 0xb0 16-bit data */ + .quad 0x00409b000000ffff /* 0x90 32-bit code */ + .quad 0x00009b000000ffff /* 0x98 16-bit code */ + .quad 0x000093000000ffff /* 0xa0 16-bit data */ + .quad 0x0000930000000000 /* 0xa8 16-bit data */ + .quad 0x0000930000000000 /* 0xb0 16-bit data */ /* * The APM segments have byte granularity and their bases * are set at run time. All have 64k limits. */ - .quad 0x00409a000000ffff /* 0xb8 APM CS code */ - .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x004092000000ffff /* 0xc8 APM DS data */ + .quad 0x00409b000000ffff /* 0xb8 APM CS code */ + .quad 0x00009b000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004093000000ffff /* 0xc8 APM DS data */ - .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ + .quad 0x0000930000000000 /* 0xd0 - ESPFIX 16-bit SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ + /* Be sure this is zeroed to avoid false validations in Xen */ + .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 + +#ifdef CONFIG_SMP + .fill (NR_CPUS-1) * (PAGE_SIZE_asm / 8),8,0 /* other CPU's GDT */ +#endif diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/i386_ksyms.c linux-2.6.19.2/arch/i386/kernel/i386_ksyms.c --- linux-2.6.19.2.orig/arch/i386/kernel/i386_ksyms.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/i386_ksyms.c 2007-01-12 23:27:21.000000000 +0000 @@ -2,12 +2,16 @@ #include #include +EXPORT_SYMBOL_GPL(cpu_gdt_table); + EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(csum_partial_copy_generic_to_user); +EXPORT_SYMBOL(csum_partial_copy_generic_from_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/init_task.c linux-2.6.19.2/arch/i386/kernel/init_task.c --- linux-2.6.19.2.orig/arch/i386/kernel/init_task.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/init_task.c 2007-01-12 23:27:21.000000000 +0000 @@ -42,5 +42,5 @@ * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +struct tss_struct init_tss[NR_CPUS] ____cacheline_internodealigned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS }; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/io_apic.c linux-2.6.19.2/arch/i386/kernel/io_apic.c --- linux-2.6.19.2.orig/arch/i386/kernel/io_apic.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/io_apic.c 2007-01-12 23:27:21.000000000 +0000 @@ -350,8 +350,8 @@ # define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) # define Dprintk(x...) do { TDprintk(x); } while (0) # else -# define TDprintk(x...) -# define Dprintk(x...) +# define TDprintk(x...) do {} while (0) +# define Dprintk(x...) do {} while (0) # endif #define IRQBALANCE_CHECK_ARCH -999 diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/ioport.c linux-2.6.19.2/arch/i386/kernel/ioport.c --- linux-2.6.19.2.orig/arch/i386/kernel/ioport.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/ioport.c 2007-01-12 23:27:21.000000000 +0000 @@ -16,6 +16,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) @@ -64,9 +65,16 @@ if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on) { + gr_handle_ioperm(); +#else if (turn_on && !capable(CAP_SYS_RAWIO)) +#endif return -EPERM; - +#ifdef CONFIG_GRKERNSEC_IO + } +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), @@ -89,7 +97,7 @@ * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); @@ -143,8 +151,13 @@ return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + gr_handle_iopl(); + return -EPERM; +#else if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#endif } t->iopl = level << 12; regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/irq.c linux-2.6.19.2/arch/i386/kernel/irq.c --- linux-2.6.19.2.orig/arch/i386/kernel/irq.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/irq.c 2007-01-12 23:27:21.000000000 +0000 @@ -100,7 +100,7 @@ int arg1, arg2, ebx; /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)) - 2; irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; @@ -137,10 +137,10 @@ * gcc's 3.0 and earlier don't handle that correctly. */ static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); /* * allocate per-cpu stacks for hardirq and for softirq processing @@ -200,7 +200,7 @@ irqctx->tinfo.previous_esp = current_stack_pointer; /* build the stack frame on the softirq stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)) - 2; asm volatile( " xchgl %%ebx,%%esp \n" diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/kprobes.c linux-2.6.19.2/arch/i386/kernel/kprobes.c --- linux-2.6.19.2.orig/arch/i386/kernel/kprobes.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/kprobes.c 2007-01-12 23:27:21.000000000 +0000 @@ -661,7 +661,7 @@ struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - if (args->regs && user_mode_vm(args->regs)) + if (args->regs && user_mode(args->regs)) return ret; switch (val) { diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/ldt.c linux-2.6.19.2/arch/i386/kernel/ldt.c --- linux-2.6.19.2.orig/arch/i386/kernel/ldt.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/ldt.c 2007-01-12 23:27:21.000000000 +0000 @@ -20,6 +20,9 @@ #include #include +const struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 } }; + #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) { @@ -103,6 +106,22 @@ retval = copy_ldt(&mm->context, &old_mm->context); up(&old_mm->context.sem); } + + if (tsk == current) { + mm->context.vdso = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + mm->context.user_cs_base = 0UL; + mm->context.user_cs_limit = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpus_clear(mm->context.cpu_user_cs_mask); +#endif + +#endif + + } + return retval; } @@ -160,11 +179,11 @@ { int err; unsigned long size; - void *address; + const void *address; err = 0; address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); + size = sizeof default_ldt; if (size > bytecount) size = bytecount; @@ -215,6 +234,13 @@ } } +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) { + error = -EINVAL; + goto out_unlock; + } +#endif + entry_1 = LDT_entry_a(&ldt_info); entry_2 = LDT_entry_b(&ldt_info); if (oldmode) diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/module.c linux-2.6.19.2/arch/i386/kernel/module.c --- linux-2.6.19.2.orig/arch/i386/kernel/module.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/module.c 2007-01-12 23:27:21.000000000 +0000 @@ -21,6 +21,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -32,9 +33,30 @@ { if (size == 0) return NULL; + +#ifdef CONFIG_PAX_KERNEXEC + return vmalloc(size); +#else return vmalloc_exec(size); +#endif + } +#ifdef CONFIG_PAX_KERNEXEC +void *module_alloc_exec(unsigned long size) +{ + struct vm_struct *area; + + if (size == 0) + return NULL; + + area = __get_vm_area(size, 0, (unsigned long)&MODULES_VADDR, (unsigned long)&MODULES_END); + if (area) + return area->addr; + + return NULL; +} +#endif /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) @@ -44,6 +66,45 @@ table entries. */ } +#ifdef CONFIG_PAX_KERNEXEC +void module_free_exec(struct module *mod, void *module_region) +{ + struct vm_struct **p, *tmp; + + if (!module_region) + return; + + if ((PAGE_SIZE-1) & (unsigned long)module_region) { + printk(KERN_ERR "Trying to module_free_exec() bad address (%p)\n", module_region); + WARN_ON(1); + return; + } + + write_lock(&vmlist_lock); + for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) + if (tmp->addr == module_region) + break; + + if (tmp) { + unsigned long cr0; + + pax_open_kernel(cr0); + memset(tmp->addr, 0xCC, tmp->size); + pax_close_kernel(cr0); + + *p = tmp->next; + kfree(tmp); + } + write_unlock(&vmlist_lock); + + if (!tmp) { + printk(KERN_ERR "Trying to module_free_exec() nonexistent vm area (%p)\n", + module_region); + WARN_ON(1); + } +} +#endif + /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -62,14 +123,16 @@ unsigned int i; Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym; - uint32_t *location; + uint32_t *plocation, location; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; + plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + location = (uint32_t)plocation; + if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR) + plocation = (void *)plocation + __KERNEL_TEXT_OFFSET; /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr @@ -78,11 +141,11 @@ switch (ELF32_R_TYPE(rel[i].r_info)) { case R_386_32: /* We add the value into the location given */ - *location += sym->st_value; + *plocation += sym->st_value; break; case R_386_PC32: /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; + *plocation += sym->st_value - location; break; default: printk(KERN_ERR "module %s: Unknown relocation: %u\n", diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/process.c linux-2.6.19.2/arch/i386/kernel/process.c --- linux-2.6.19.2.orig/arch/i386/kernel/process.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/process.c 2007-01-12 23:27:21.000000000 +0000 @@ -69,7 +69,7 @@ */ unsigned long thread_saved_pc(struct task_struct *tsk) { - return ((unsigned long *)tsk->thread.esp)[3]; + return tsk->thread.eip; } /* @@ -309,7 +309,7 @@ printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) + if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", regs->eflags, print_tainted(), init_utsname()->release, @@ -349,8 +349,8 @@ regs.ebx = (unsigned long) fn; regs.edx = (unsigned long) arg; - regs.xds = __USER_DS; - regs.xes = __USER_DS; + regs.xds = __KERNEL_DS; + regs.xes = __KERNEL_DS; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -371,7 +371,7 @@ struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; @@ -392,6 +392,9 @@ { struct task_struct *tsk = current; + __asm__("mov %0,%%fs\n" + "mov %0,%%gs\n" + : : "r" (0) : "memory"); memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); clear_tsk_thread_flag(tsk, TIF_DEBUG); @@ -425,7 +428,7 @@ struct task_struct *tsk; int err; - childregs = task_pt_regs(p); + childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8; *childregs = *regs; childregs->eax = 0; childregs->esp = esp; @@ -468,6 +471,11 @@ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) goto out; +#ifdef CONFIG_PAX_SEGMEXEC + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + goto out; +#endif + desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); @@ -647,7 +655,11 @@ struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -670,11 +682,24 @@ savesegment(fs, prev->fs); savesegment(gs, prev->gs); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!segment_eq(prev_p->thread_info->addr_limit, next_p->thread_info->addr_limit)) + __set_fs(next_p->thread_info->addr_limit, cpu); +#endif + /* * Load the per-thread Thread-Local Storage descriptor. */ load_TLS(next, cpu); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + /* * Restore %fs and %gs if needed. * @@ -819,8 +844,18 @@ struct desc_struct *desc; int cpu, idx; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + +#ifdef CONFIG_PAX_SEGMEXEC + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + idx = info.entry_number; /* @@ -852,8 +887,17 @@ desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + load_TLS(t, cpu); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); return 0; @@ -909,9 +953,27 @@ return 0; } -unsigned long arch_align_stack(unsigned long sp) +#ifdef CONFIG_PAX_RANDKSTACK +asmlinkage void pax_randomize_kstack(void) { - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; + struct tss_struct *tss = init_tss + smp_processor_id(); + unsigned long time; + + if (!randomize_va_space) + return; + + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x1EUL; + time <<= 2; +#else + time &= 0xFUL; + time <<= 3; +#endif + + tss->esp0 ^= time; + current->thread.esp0 = tss->esp0; } +#endif diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/ptrace.c linux-2.6.19.2/arch/i386/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/i386/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/ptrace.c 2007-01-12 23:27:21.000000000 +0000 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -169,15 +170,15 @@ * and APM bios ones we just ignore here. */ if (seg & LDT_SEGMENT) { - u32 *desc; + struct desc_struct *desc; unsigned long base; down(&child->mm->context.sem); - desc = child->mm->context.ldt + (seg & ~7); - base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000); + desc = &child->mm->context.ldt[seg >> 3]; + base = (desc->a >> 16) | ((desc->b & 0xff) << 16) | (desc->b & 0xff000000); /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) + if (!((desc->b >> 22) & 1)) addr &= 0xffff; addr += base; up(&child->mm->context.sem); @@ -342,6 +343,11 @@ if (copy_from_user(&info, user_desc, sizeof(info))) return -EFAULT; +#ifdef CONFIG_PAX_SEGMEXEC + if ((child->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; @@ -432,6 +438,17 @@ if(addr == (long) &dummy->u_debugreg[5]) break; if(addr < (long) &dummy->u_debugreg[4] && ((unsigned long) data) >= TASK_SIZE-3) break; + +#ifdef CONFIG_GRKERNSEC + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[3]){ + long reg = (addr - (long) &dummy->u_debugreg[0]) >> 2; + long type = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 4*reg)) & 3; + long align = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 2 + 4*reg)) & 3; + if((type & 1) && (data & align)) + break; + } +#endif /* Sanity-check data. Take one half-byte at once with * check = (val >> (16 + 4*i)) & 0xf. It contains the @@ -648,7 +665,7 @@ info.si_code = TRAP_BRKPT; /* User-mode eip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; + info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; /* Send us the fakey SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/reboot.c linux-2.6.19.2/arch/i386/kernel/reboot.c --- linux-2.6.19.2.orig/arch/i386/kernel/reboot.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/reboot.c 2007-01-12 23:27:21.000000000 +0000 @@ -24,7 +24,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static int reboot_mode; +static unsigned short reboot_mode; static int reboot_thru_bios; #ifdef CONFIG_SMP @@ -137,15 +137,15 @@ doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct Xgt_desc_struct +static const struct Xgt_desc_struct real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, real_mode_idt = { 0x3ff, 0 }, no_idt = { 0, 0 }; @@ -170,7 +170,7 @@ More could be done here to set up the registers as if a CPU reset had occurred; hopefully real BIOSs don't assume much. */ -static unsigned char real_mode_switch [] = +static const unsigned char real_mode_switch [] = { 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ @@ -184,7 +184,7 @@ 0x24, 0x10, /* f: andb $0x10,al */ 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ }; -static unsigned char jump_to_bios [] = +static const unsigned char jump_to_bios [] = { 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ }; @@ -194,10 +194,14 @@ * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(unsigned char *code, int length) +void machine_real_restart(const unsigned char *code, unsigned int length) { unsigned long flags; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST @@ -218,8 +222,16 @@ from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + USER_PGD_PTRS >= KERNEL_PGD_PTRS ? KERNEL_PGD_PTRS : USER_PGD_PTRS); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif /* * Use `swapper_pg_dir' as our page directory. @@ -232,7 +244,7 @@ REBOOT.COM programs, and the previous reset routine did this too. */ - *((unsigned short *)0x472) = reboot_mode; + __put_user(reboot_mode, (unsigned short __user *)0x472); /* For the switch to real mode, copy some code to low memory. It has to be in the first 64k because it is running in 16-bit mode, and it @@ -240,9 +252,9 @@ off paging. Copy it near the end of the first page, out of the way of BIOS variables. */ - memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), + flags = __copy_to_user ((void __user *) (0x1000 - sizeof (real_mode_switch) - 100), real_mode_switch, sizeof (real_mode_switch)); - memcpy ((void *) (0x1000 - 100), code, length); + flags = __copy_to_user ((void __user *) (0x1000 - 100), code, length); /* Set up the IDT for real mode. */ @@ -324,7 +336,7 @@ __asm__ __volatile__("int3"); } /* rebooting needs to touch the page at absolute addr 0 */ - *((unsigned short *)__va(0x472)) = reboot_mode; + __put_user(reboot_mode, (unsigned short __user *)0x472); for (;;) { mach_reboot_fixups(); /* for board specific fixups */ mach_reboot(); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/setup.c linux-2.6.19.2/arch/i386/kernel/setup.c --- linux-2.6.19.2.orig/arch/i386/kernel/setup.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/setup.c 2007-01-12 23:27:21.000000000 +0000 @@ -88,7 +88,11 @@ struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); +#ifdef CONFIG_X86_PAE +unsigned long mmu_cr4_features = X86_CR4_PAE; +#else unsigned long mmu_cr4_features; +#endif /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; @@ -1388,14 +1392,14 @@ if (!MOUNT_ROOT_RDONLY) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; + init_mm.start_code = (unsigned long) _text + __KERNEL_TEXT_OFFSET; + init_mm.end_code = (unsigned long) _etext + __KERNEL_TEXT_OFFSET; init_mm.end_data = (unsigned long) _edata; init_mm.brk = init_pg_tables_end + PAGE_OFFSET; - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); + code_resource.start = virt_to_phys(_text + __KERNEL_TEXT_OFFSET); + code_resource.end = virt_to_phys(_etext + __KERNEL_TEXT_OFFSET)-1; + data_resource.start = virt_to_phys(_data); data_resource.end = virt_to_phys(_edata)-1; parse_early_param(); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/signal.c linux-2.6.19.2/arch/i386/kernel/signal.c --- linux-2.6.19.2.orig/arch/i386/kernel/signal.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/signal.c 2007-01-12 23:27:21.000000000 +0000 @@ -351,7 +351,7 @@ goto give_sigsegv; } - restorer = (void *)VDSO_SYM(&__kernel_sigreturn); + restorer = (void __user *)VDSO_SYM(&__kernel_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -447,7 +447,8 @@ goto give_sigsegv; /* Set up to return from userspace. */ - restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn); + + restorer = (void __user *)VDSO_SYM(&__kernel_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); @@ -580,7 +581,7 @@ * before reaching here, so testing against kernel * CS suffices. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) return; if (test_thread_flag(TIF_RESTORE_SIGMASK)) diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/smpboot.c linux-2.6.19.2/arch/i386/kernel/smpboot.c --- linux-2.6.19.2.orig/arch/i386/kernel/smpboot.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/smpboot.c 2007-01-12 23:27:21.000000000 +0000 @@ -1066,7 +1066,6 @@ struct warm_boot_cpu_info info; struct work_struct task; int apicid, ret; - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { @@ -1078,13 +1077,7 @@ * the CPU isn't initialized at boot time, allocate gdt table here. * cpu_init will initialize it */ - if (!cpu_gdt_descr->address) { - cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL); - if (!cpu_gdt_descr->address) - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - ret = -ENOMEM; - goto exit; - } + cpu_gdt_descr[cpu].address = get_cpu_gdt_table(cpu); info.complete = &done; info.apicid = apicid; @@ -1095,7 +1088,7 @@ /* init low mem mapping */ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + min(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); flush_tlb_all(); schedule_work(&task); wait_for_completion(&done); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/sys_i386.c linux-2.6.19.2/arch/i386/kernel/sys_i386.c --- linux-2.6.19.2.orig/arch/i386/kernel/sys_i386.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/sys_i386.c 2007-01-12 23:27:21.000000000 +0000 @@ -100,6 +100,191 @@ return err; } +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE) && start_addr >= mm->mmap_base) { + start_addr = 0x00110000UL; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + start_addr += mm->delta_mmap & 0x03FFF000UL; +#endif + + if (mm->start_brk <= start_addr && start_addr < mm->mmap_base) + start_addr = addr = mm->mmap_base; + else + addr = start_addr; + } +#endif + +full_search: + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + if (mm->start_brk <= addr && addr < mm->mmap_base) { + start_addr = addr = mm->mmap_base; + goto full_search; + } + } +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + /* requested length too big for entire address space */ + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) + goto bottomup; +#endif + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + + if (mm->mmap_base < len) + goto bottomup; + + addr = mm->mmap_base-len; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (!vma || addr+len <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + } while (len < vma->vm_start); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->mmap_base = base; + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +} struct sel_arg_struct { unsigned long n; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/syscall_table.S linux-2.6.19.2/arch/i386/kernel/syscall_table.S --- linux-2.6.19.2.orig/arch/i386/kernel/syscall_table.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/syscall_table.S 2007-01-12 23:27:21.000000000 +0000 @@ -1,3 +1,4 @@ +.section .rodata,"a",@progbits ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/sysenter.c linux-2.6.19.2/arch/i386/kernel/sysenter.c --- linux-2.6.19.2.orig/arch/i386/kernel/sysenter.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/sysenter.c 2007-01-12 23:27:21.000000000 +0000 @@ -45,7 +45,7 @@ void enable_sep_cpu(void) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; if (!boot_cpu_has(X86_FEATURE_SEP)) { put_cpu(); @@ -125,16 +125,36 @@ unsigned long addr; int ret; +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct *vma_m = NULL; +#endif + + vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + if (!vma) + return -ENOMEM; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) { + vma_m = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + if (!vma_m) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + } +#endif + down_write(&mm->mmap_sem); - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, MAP_EXECUTABLE); if (IS_ERR_VALUE(addr)) { ret = addr; - goto up_fail; - } - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) { - ret = -ENOMEM; + kmem_cache_free(vm_area_cachep, vma); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) + kmem_cache_free(vm_area_cachep, vma_m); +#endif + goto up_fail; } @@ -142,18 +162,49 @@ vma->vm_end = addr + PAGE_SIZE; /* MAYWRITE to allow gdb to COW and set breakpoints */ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) + vma->vm_flags &= ~VM_MAYWRITE; +#endif + vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + vma->vm_page_prot = protection_map[vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; vma->vm_ops = &syscall_vm_ops; vma->vm_mm = mm; ret = insert_vm_struct(mm, vma); if (unlikely(ret)) { kmem_cache_free(vm_area_cachep, vma); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) + kmem_cache_free(vm_area_cachep, vma_m); +#endif + goto up_fail; } - current->mm->context.vdso = (void *)addr; +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) { + *vma_m = *vma; + vma_m->vm_start += SEGMEXEC_TASK_SIZE; + vma_m->vm_end += SEGMEXEC_TASK_SIZE; + ret = insert_vm_struct(mm, vma_m); + if (unlikely(ret)) { + kmem_cache_free(vm_area_cachep, vma_m); + goto up_fail; + } + vma_m->vm_flags |= VM_MIRROR; + vma->vm_flags |= VM_MIRROR; + vma_m->vm_mirror = vma->vm_start - vma_m->vm_start; + vma->vm_mirror = vma_m->vm_start - vma->vm_start; + vma_m->vm_pgoff = vma->vm_pgoff; + mm->total_vm++; + } +#endif + + current->mm->context.vdso = addr; current_thread_info()->sysenter_return = (void *)VDSO_SYM(&SYSENTER_RETURN); mm->total_vm++; @@ -164,8 +215,17 @@ const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + if (vma->vm_start == vma->vm_mm->context.vdso) return "[vdso]"; + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(vma->vm_mm->pax_flags & MF_PAX_SEGMEXEC) || !(vma->vm_flags & VM_MIRROR)) + return NULL; + + if (vma->vm_start + vma->vm_mirror == vma->vm_mm->context.vdso) + return "[vdso]"; +#endif + return NULL; } diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/time.c linux-2.6.19.2/arch/i386/kernel/time.c --- linux-2.6.19.2.orig/arch/i386/kernel/time.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/time.c 2007-01-12 23:27:21.000000000 +0000 @@ -133,7 +133,7 @@ unsigned long pc = instruction_pointer(regs); #ifdef CONFIG_SMP - if (!user_mode_vm(regs) && in_lock_functions(pc)) { + if (!user_mode(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->ebp + 4); #else @@ -345,7 +345,7 @@ .cls = &timer_sysclass, }; -static int time_init_device(void) +static int __init time_init_device(void) { int error = sysdev_class_register(&timer_sysclass); if (!error) diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/traps.c linux-2.6.19.2/arch/i386/kernel/traps.c --- linux-2.6.19.2.orig/arch/i386/kernel/traps.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/traps.c 2007-01-12 23:27:21.000000000 +0000 @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -61,18 +62,10 @@ asmlinkage int system_call(void); -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 } }; - /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +extern struct desc_struct idt_table[256]; asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -146,7 +139,7 @@ #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; - if (__kernel_text_address(addr)) + if (__kernel_text_address(addr + __KERNEL_TEXT_OFFSET)) ops->address(data, addr); } #endif @@ -351,7 +344,7 @@ esp = (unsigned long) (®s->esp); savesegment(ss, ss); - if (user_mode_vm(regs)) { + if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -382,13 +375,15 @@ u8 __user *eip; int code_bytes = 64; unsigned char c; + mm_segment_t old_fs = get_fs(); printk("\n" KERN_EMERG "Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); printk(KERN_EMERG "Code: "); - eip = (u8 __user *)regs->eip - 43; + set_fs(KERNEL_DS); + eip = (u8 __user *)regs->eip - 43 + __KERNEL_TEXT_OFFSET; if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { /* try starting at EIP */ eip = (u8 __user *)regs->eip; @@ -399,26 +394,29 @@ printk(" Bad EIP value."); break; } - if (eip == (u8 __user *)regs->eip) + if (eip == (u8 __user *)regs->eip + __KERNEL_TEXT_OFFSET) printk("<%02x> ", c); else printk("%02x ", c); } + set_fs(old_fs); } printk("\n"); } static void handle_BUG(struct pt_regs *regs) { - unsigned long eip = regs->eip; + unsigned long eip = regs->eip + __KERNEL_TEXT_OFFSET; unsigned short ud2; + mm_segment_t old_fs = get_fs(); + set_fs(KERNEL_DS); if (eip < PAGE_OFFSET) - return; + goto out; if (probe_kernel_address((unsigned short __user *)eip, ud2)) - return; + goto out; if (ud2 != 0x0b0f) - return; + goto out; printk(KERN_EMERG "------------[ cut here ]------------\n"); @@ -428,18 +426,21 @@ char *file; char c; - if (probe_kernel_address((unsigned short __user *)(eip + 2), - line)) + if (probe_kernel_address((unsigned short __user *)(eip + 7), line)) break; - if (__get_user(file, (char * __user *)(eip + 4)) || - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) + if (probe_kernel_address((char * __user *)(eip + 3), file) || + file < _text + __KERNEL_TEXT_OFFSET) + break; + if (probe_kernel_address(file, c)) file = ""; - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); - return; + goto out; } while (0); #endif printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n"); + +out: + set_fs(old_fs); } /* This is gone through when something in the kernel @@ -538,7 +539,7 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!user_mode_vm(regs)) + if (!user_mode(regs)) die(str, regs, err); } @@ -556,7 +557,7 @@ goto trap_signal; } - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto kernel_trap; trap_signal: { @@ -644,7 +645,7 @@ long error_code) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = &init_tss[cpu]; struct thread_struct *thread = ¤t->thread; /* @@ -680,9 +681,25 @@ if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto gp_in_kernel; +#ifdef CONFIG_PAX_PAGEEXEC + if (current->mm && (current->mm->pax_flags & MF_PAX_PAGEEXEC)) { + struct mm_struct *mm = current->mm; + unsigned long limit; + + down_write(&mm->mmap_sem); + limit = mm->context.user_cs_limit; + if (limit < TASK_SIZE) { + track_exec_limit(mm, limit, TASK_SIZE, PROT_EXEC); + up_write(&mm->mmap_sem); + return; + } + up_write(&mm->mmap_sem); + } +#endif + current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); @@ -698,6 +715,13 @@ if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 13, SIGSEGV) == NOTIFY_STOP) return; + +#ifdef CONFIG_PAX_KERNEXEC + if ((regs->xcs & 0xFFFF) == __KERNEL_CS) + die("PAX: suspicious general protection fault", regs, error_code); + else +#endif + die("general protection fault", regs, error_code); } } @@ -781,7 +805,7 @@ /* If we are in kernel we are probably nested up pretty bad * and might aswell get out now while we still can. */ - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { current->thread.trap_no = 2; crash_kexec(regs); } @@ -913,7 +937,7 @@ * check for kernel mode by just checking the CPL * of CS. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto clear_TF_reenable; } @@ -1189,7 +1213,19 @@ */ void set_intr_gate(unsigned int n, void *addr) { + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } /* diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/vm86.c linux-2.6.19.2/arch/i386/kernel/vm86.c --- linux-2.6.19.2.orig/arch/i386/kernel/vm86.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/vm86.c 2007-01-12 23:27:21.000000000 +0000 @@ -122,7 +122,7 @@ do_exit(SIGSEGV); } - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; load_esp0(tss, ¤t->thread); @@ -296,7 +296,7 @@ savesegment(fs, tsk->thread.saved_fs); savesegment(gs, tsk->thread.saved_gs); - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/vmlinux.lds.S linux-2.6.19.2/arch/i386/kernel/vmlinux.lds.S --- linux-2.6.19.2.orig/arch/i386/kernel/vmlinux.lds.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/vmlinux.lds.S 2007-01-12 23:27:21.000000000 +0000 @@ -8,6 +8,13 @@ #include #include #include +#include + +#ifdef CONFIG_X86_PAE +#define PMD_SHIFT 21 +#else +#define PMD_SHIFT 22 +#endif OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -16,31 +23,133 @@ PHDRS { text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ + data PT_LOAD FLAGS(6); /* RW_ */ note PT_NOTE FLAGS(4); /* R__ */ } SECTIONS { . = __KERNEL_START; - phys_startup_32 = startup_32 - LOAD_OFFSET; + phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; + + .text.startup : AT(ADDR(.text.startup) - LOAD_OFFSET) { + BYTE(0xEA) /* jmp far */ + LONG(phys_startup_32) + SHORT(__BOOT_CS) + } :text = 0x9090 + + /* might get freed after init */ + . = ALIGN(4096); + __smp_alt_begin = .; + __smp_alt_instructions = .; + .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + *(.smp_altinstructions) + } + __smp_alt_instructions_end = .; + . = ALIGN(4); + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) + } + __smp_locks_end = .; + .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { + *(.smp_altinstr_replacement) + } + . = ALIGN(4096); + __smp_alt_end = .; + + /* will be freed after init */ + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } + . = ALIGN(16); + __setup_start = .; + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } + __setup_end = .; + __initcall_start = .; + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + INITCALLS + } + __initcall_end = .; + __con_initcall_start = .; + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + *(.con_initcall.init) + } + __con_initcall_end = .; + SECURITY_INIT + . = ALIGN(4); + __alt_instructions = .; + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + *(.altinstructions) + } + __alt_instructions_end = .; + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } + + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } + . = ALIGN(4096); + __initramfs_start = .; + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } + __initramfs_end = .; + . = ALIGN(L1_CACHE_BYTES); + __per_cpu_start = .; + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } + __per_cpu_end = .; + /* read-only */ + + . = ALIGN(4096); + .init.text (. - __KERNEL_TEXT_OFFSET) : AT(ADDR(.init.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + /* .exit.text is discard at runtime, not link time, to deal with references + from .altinstructions and .eh_frame */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { *(.exit.text) } + +#ifdef CONFIG_PAX_KERNEXEC + .text.align : AT(ADDR(.text.align) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + . = ALIGN(__KERNEL_TEXT_OFFSET - LOAD_OFFSET) - 1; + BYTE(0) + } +#else + . = ALIGN(4096); +#endif + + __init_end = . + __KERNEL_TEXT_OFFSET; + /* freed after init ends here */ + _text = .; /* Text and read-only data */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { + .text : AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { *(.text) SCHED_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) *(.gnu.warning) - } :text = 0x9090 + } _etext = .; /* End of text section */ - + . += __KERNEL_TEXT_OFFSET; . = ALIGN(16); /* Exception table */ __start___ex_table = .; __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } __stop___ex_table = .; + . = ALIGN(4096); + .rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) { + *(.empty_zero_page) + +#ifdef CONFIG_X86_PAE + *(.swapper_pm_dir) +#endif + + *(.swapper_pg_dir) + *(.idt) + } + RODATA . = ALIGN(4); @@ -50,9 +159,25 @@ } __tracedata_end = .; +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(4096); + MODULES_VADDR = .; + + .module.text : AT(ADDR(.module.text) - LOAD_OFFSET) { + . += (4 * 1024 * 1024); + . = ALIGN(1 << PMD_SHIFT) - 1; + BYTE(0) + } + + MODULES_END = .; +#else + . = ALIGN(32); +#endif + /* writeable */ . = ALIGN(4096); .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + _data = .; *(.data) CONSTRUCTORS } :data @@ -63,11 +188,6 @@ . = ALIGN(4096); __nosave_end = .; - . = ALIGN(4096); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.idt) - } - . = ALIGN(32); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) @@ -76,7 +196,6 @@ /* rarely changed data like cpu maps */ . = ALIGN(32); .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } - _edata = .; /* End of data section */ #ifdef CONFIG_STACK_UNWIND . = ALIGN(4); @@ -92,75 +211,9 @@ *(.data.init_task) } - /* might get freed after init */ - . = ALIGN(4096); - __smp_alt_begin = .; - __smp_alt_instructions = .; - .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { - *(.smp_altinstructions) - } - __smp_alt_instructions_end = .; - . = ALIGN(4); - __smp_locks = .; - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - *(.smp_locks) - } - __smp_locks_end = .; - .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { - *(.smp_altinstr_replacement) - } - . = ALIGN(4096); - __smp_alt_end = .; + _edata = .; /* End of data section */ - /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - *(.init.text) - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } - . = ALIGN(16); - __setup_start = .; - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } - __setup_end = .; - __initcall_start = .; - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INITCALLS - } - __initcall_end = .; - __con_initcall_start = .; - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - *(.con_initcall.init) - } - __con_initcall_end = .; - SECURITY_INIT - . = ALIGN(4); - __alt_instructions = .; - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - *(.altinstructions) - } - __alt_instructions_end = .; - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } - . = ALIGN(4096); - __initramfs_start = .; - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } - __initramfs_end = .; - . = ALIGN(L1_CACHE_BYTES); - __per_cpu_start = .; - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } - __per_cpu_end = .; . = ALIGN(4096); - __init_end = .; - /* freed after init ends here */ - __bss_start = .; /* BSS */ .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) { *(.bss.page_aligned) diff -Naur linux-2.6.19.2.orig/arch/i386/lib/checksum.S linux-2.6.19.2/arch/i386/lib/checksum.S --- linux-2.6.19.2.orig/arch/i386/lib/checksum.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/checksum.S 2007-01-12 23:27:21.000000000 +0000 @@ -26,7 +26,8 @@ */ #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -280,12 +281,23 @@ .align 4 .globl csum_partial_copy_generic - +.globl csum_partial_copy_generic_to_user +.globl csum_partial_copy_generic_from_user + #ifndef CONFIG_X86_USE_PPRO_CHECKSUM #define ARGBASE 16 #define FP 12 - + +csum_partial_copy_generic_to_user: + pushl $(__USER_DS) + popl %es + jmp csum_partial_copy_generic + +csum_partial_copy_generic_from_user: + pushl $(__USER_DS) + popl %ds + csum_partial_copy_generic: subl $4,%esp pushl %edi @@ -304,7 +316,7 @@ jmp 4f SRC(1: movw (%esi), %bx ) addl $2, %esi -DST( movw %bx, (%edi) ) +DST( movw %bx, %es:(%edi) ) addl $2, %edi addw %bx, %ax adcl $0, %eax @@ -316,30 +328,30 @@ SRC(1: movl (%esi), %ebx ) SRC( movl 4(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) adcl %edx, %eax -DST( movl %edx, 4(%edi) ) +DST( movl %edx, %es:4(%edi) ) SRC( movl 8(%esi), %ebx ) SRC( movl 12(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) +DST( movl %ebx, %es:8(%edi) ) adcl %edx, %eax -DST( movl %edx, 12(%edi) ) +DST( movl %edx, %es:12(%edi) ) SRC( movl 16(%esi), %ebx ) SRC( movl 20(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) +DST( movl %ebx, %es:16(%edi) ) adcl %edx, %eax -DST( movl %edx, 20(%edi) ) +DST( movl %edx, %es:20(%edi) ) SRC( movl 24(%esi), %ebx ) SRC( movl 28(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) +DST( movl %ebx, %es:24(%edi) ) adcl %edx, %eax -DST( movl %edx, 28(%edi) ) +DST( movl %edx, %es:28(%edi) ) lea 32(%esi), %esi lea 32(%edi), %edi @@ -353,7 +365,7 @@ shrl $2, %edx # This clears CF SRC(3: movl (%esi), %ebx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) lea 4(%esi), %esi lea 4(%edi), %edi dec %edx @@ -365,12 +377,12 @@ jb 5f SRC( movw (%esi), %cx ) leal 2(%esi), %esi -DST( movw %cx, (%edi) ) +DST( movw %cx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%ecx SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) +DST( movb %cl, %es:(%edi) ) 6: addl %ecx, %eax adcl $0, %eax 7: @@ -381,7 +393,7 @@ 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination - computing the rest # is too much work @@ -394,11 +406,15 @@ 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) + movl $-EFAULT,%ss:(%ebx) jmp 5000b .previous + pushl %ss + popl %ds + pushl %ss + popl %es popl %ebx popl %esi popl %edi @@ -410,17 +426,28 @@ /* Version for PentiumII/PPro */ #define ROUND1(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)); #define ROUND(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)); #define ARGBASE 12 - + +csum_partial_copy_generic_to_user: + pushl $(__USER_DS) + popl %es + jmp csum_partial_copy_generic + +csum_partial_copy_generic_from_user: + pushl $(__USER_DS) + popl %ds + csum_partial_copy_generic: pushl %ebx pushl %edi @@ -439,7 +466,7 @@ subl %ebx, %edi lea -1(%esi),%edx andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx + lea 3f(%ebx,%ebx,2), %ebx testl %esi, %esi jmp *%ebx 1: addl $64,%esi @@ -460,19 +487,19 @@ jb 5f SRC( movw (%esi), %dx ) leal 2(%esi), %esi -DST( movw %dx, (%edi) ) +DST( movw %dx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%edx 5: SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) +DST( movb %dl, %es:(%edi) ) 6: addl %edx, %eax adcl $0, %eax 7: .section .fixup, "ax" 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination (computing the rest is too much work) movl ARGBASE+8(%esp),%edi # dst movl ARGBASE+12(%esp),%ecx # len @@ -480,10 +507,14 @@ rep; stosb jmp 7b 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) jmp 7b .previous + pushl %ss + popl %ds + pushl %ss + popl %es popl %esi popl %edi popl %ebx diff -Naur linux-2.6.19.2.orig/arch/i386/lib/getuser.S linux-2.6.19.2/arch/i386/lib/getuser.S --- linux-2.6.19.2.orig/arch/i386/lib/getuser.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/getuser.S 2007-01-12 23:27:21.000000000 +0000 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -30,8 +31,12 @@ GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 1: movzbl (%eax),%edx xorl %eax,%eax + pushl %ss + pop %ds ret .align 4 @@ -42,7 +47,11 @@ GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 2: movzwl -1(%eax),%edx + pushl %ss + pop %ds xorl %eax,%eax ret @@ -54,11 +63,17 @@ GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 3: movl -3(%eax),%edx + pushl %ss + pop %ds xorl %eax,%eax ret bad_get_user: + pushl %ss + pop %ds xorl %edx,%edx movl $-14,%eax ret diff -Naur linux-2.6.19.2.orig/arch/i386/lib/mmx.c linux-2.6.19.2/arch/i386/lib/mmx.c --- linux-2.6.19.2.orig/arch/i386/lib/mmx.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/mmx.c 2007-01-12 23:27:21.000000000 +0000 @@ -47,14 +47,30 @@ " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(; i>5; i--) @@ -78,14 +94,30 @@ " movq %%mm2, 48(%1)\n" " movq %%mm3, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } @@ -178,14 +210,30 @@ " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(i=0; i<(4096-320)/64; i++) { @@ -208,14 +256,30 @@ " movq 56(%0), %%mm7\n" " movntq %%mm7, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } @@ -308,14 +372,30 @@ " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(i=0; i<4096/64; i++) { @@ -338,14 +418,30 @@ " movq %%mm2, 48(%1)\n" " movq %%mm3, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } diff -Naur linux-2.6.19.2.orig/arch/i386/lib/putuser.S linux-2.6.19.2/arch/i386/lib/putuser.S --- linux-2.6.19.2.orig/arch/i386/lib/putuser.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/putuser.S 2007-01-12 23:27:21.000000000 +0000 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -33,7 +34,11 @@ ENTER cmpl TI_addr_limit(%ebx),%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 1: movb %al,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -45,7 +50,11 @@ subl $1,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 2: movw %ax,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -57,7 +66,11 @@ subl $3,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 3: movl %eax,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -69,12 +82,18 @@ subl $7,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 4: movl %eax,(%ecx) 5: movl %edx,4(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT bad_put_user: + pushl %ss + popl %ds movl $-14,%eax EXIT diff -Naur linux-2.6.19.2.orig/arch/i386/lib/usercopy.c linux-2.6.19.2/arch/i386/lib/usercopy.c --- linux-2.6.19.2.orig/arch/i386/lib/usercopy.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/usercopy.c 2007-01-12 23:27:21.000000000 +0000 @@ -28,34 +28,41 @@ * Copy a null terminated string from userspace. */ -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - int __d0, __d1, __d2; \ - might_sleep(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - ".previous" \ - : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) +static long __do_strncpy_from_user(char *dst, const char __user *src, long count) +{ + int __d0, __d1, __d2; + long res = -EFAULT; + + might_sleep(); + __asm__ __volatile__( + " movw %w10,%%ds\n" + " testl %1,%1\n" + " jz 2f\n" + "0: lodsb\n" + " stosb\n" + " testb %%al,%%al\n" + " jz 1f\n" + " decl %1\n" + " jnz 0b\n" + "1: subl %1,%0\n" + "2:\n" + " pushl %%ss\n" + " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "3: movl %5,%0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,3b\n" + ".previous" + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), + "=&D" (__d2) + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst), + "r"(__USER_DS) + : "memory"); + return res; +} /** * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. @@ -80,9 +87,7 @@ long __strncpy_from_user(char *dst, const char __user *src, long count) { - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; + return __do_strncpy_from_user(dst, src, count); } EXPORT_SYMBOL(__strncpy_from_user); @@ -109,7 +114,7 @@ { long res = -EFAULT; if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); + res = __do_strncpy_from_user(dst, src, count); return res; } EXPORT_SYMBOL(strncpy_from_user); @@ -118,27 +123,33 @@ * Zero Userspace */ -#define __do_clear_user(addr,size) \ -do { \ - int __d0; \ - might_sleep(); \ - __asm__ __volatile__( \ - "0: rep; stosl\n" \ - " movl %2,%0\n" \ - "1: rep; stosb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%2,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0) \ - : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ -} while (0) +static unsigned long __do_clear_user(void __user *addr, unsigned long size) +{ + int __d0; + + might_sleep(); + __asm__ __volatile__( + " movw %w6,%%es\n" + "0: rep; stosl\n" + " movl %2,%0\n" + "1: rep; stosb\n" + "2:\n" + " pushl %%ss\n" + " popl %%es\n" + ".section .fixup,\"ax\"\n" + "3: lea 0(%2,%0,4),%0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,3b\n" + " .long 1b,2b\n" + ".previous" + : "=&c"(size), "=&D" (__d0) + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0), + "r"(__USER_DS)); + return size; +} /** * clear_user: - Zero a block of memory in user space. @@ -155,7 +166,7 @@ { might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) - __do_clear_user(to, n); + n = __do_clear_user(to, n); return n; } EXPORT_SYMBOL(clear_user); @@ -174,8 +185,7 @@ unsigned long __clear_user(void __user *to, unsigned long n) { - __do_clear_user(to, n); - return n; + return __do_clear_user(to, n); } EXPORT_SYMBOL(__clear_user); @@ -198,14 +208,17 @@ might_sleep(); __asm__ __volatile__( + " movw %w8,%%es\n" " testl %0, %0\n" " jz 3f\n" - " andl %0,%%ecx\n" + " movl %0,%%ecx\n" "0: repne; scasb\n" " setne %%al\n" " subl %%ecx,%0\n" " addl %0,%%eax\n" "1:\n" + " pushl %%ss\n" + " popl %%es\n" ".section .fixup,\"ax\"\n" "2: xorl %%eax,%%eax\n" " jmp 1b\n" @@ -217,7 +230,7 @@ " .long 0b,2b\n" ".previous" :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) - :"0" (n), "1" (s), "2" (0), "3" (mask) + :"0" (n), "1" (s), "2" (0), "3" (mask), "r" (__USER_DS) :"cc"); return res & mask; } @@ -225,10 +238,121 @@ #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long -__copy_user_intel(void __user *to, const void *from, unsigned long size) +__generic_copy_to_user_intel(void __user *to, const void *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " movw %w6, %%es\n" + " .align 2,0x90\n" + "1: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 3f\n" + "2: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "3: movl 0(%4), %%eax\n" + "4: movl 4(%4), %%edx\n" + "5: movl %%eax, %%es:0(%3)\n" + "6: movl %%edx, %%es:4(%3)\n" + "7: movl 8(%4), %%eax\n" + "8: movl 12(%4),%%edx\n" + "9: movl %%eax, %%es:8(%3)\n" + "10: movl %%edx, %%es:12(%3)\n" + "11: movl 16(%4), %%eax\n" + "12: movl 20(%4), %%edx\n" + "13: movl %%eax, %%es:16(%3)\n" + "14: movl %%edx, %%es:20(%3)\n" + "15: movl 24(%4), %%eax\n" + "16: movl 28(%4), %%edx\n" + "17: movl %%eax, %%es:24(%3)\n" + "18: movl %%edx, %%es:28(%3)\n" + "19: movl 32(%4), %%eax\n" + "20: movl 36(%4), %%edx\n" + "21: movl %%eax, %%es:32(%3)\n" + "22: movl %%edx, %%es:36(%3)\n" + "23: movl 40(%4), %%eax\n" + "24: movl 44(%4), %%edx\n" + "25: movl %%eax, %%es:40(%3)\n" + "26: movl %%edx, %%es:44(%3)\n" + "27: movl 48(%4), %%eax\n" + "28: movl 52(%4), %%edx\n" + "29: movl %%eax, %%es:48(%3)\n" + "30: movl %%edx, %%es:52(%3)\n" + "31: movl 56(%4), %%eax\n" + "32: movl 60(%4), %%edx\n" + "33: movl %%eax, %%es:56(%3)\n" + "34: movl %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 1b\n" + "35: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "99: rep; movsl\n" + "36: movl %%eax, %0\n" + "37: rep; movsb\n" + "100:\n" + " pushl %%ss\n" + " popl %%es\n" + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,100b\n" + " .long 2b,100b\n" + " .long 3b,100b\n" + " .long 4b,100b\n" + " .long 5b,100b\n" + " .long 6b,100b\n" + " .long 7b,100b\n" + " .long 8b,100b\n" + " .long 9b,100b\n" + " .long 10b,100b\n" + " .long 11b,100b\n" + " .long 12b,100b\n" + " .long 13b,100b\n" + " .long 14b,100b\n" + " .long 15b,100b\n" + " .long 16b,100b\n" + " .long 17b,100b\n" + " .long 18b,100b\n" + " .long 19b,100b\n" + " .long 20b,100b\n" + " .long 21b,100b\n" + " .long 22b,100b\n" + " .long 23b,100b\n" + " .long 24b,100b\n" + " .long 25b,100b\n" + " .long 26b,100b\n" + " .long 27b,100b\n" + " .long 28b,100b\n" + " .long 29b,100b\n" + " .long 30b,100b\n" + " .long 31b,100b\n" + " .long 32b,100b\n" + " .long 33b,100b\n" + " .long 34b,100b\n" + " .long 35b,100b\n" + " .long 36b,100b\n" + " .long 37b,100b\n" + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long +__generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size) { int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "1: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -237,36 +361,36 @@ " .align 2,0x90\n" "3: movl 0(%4), %%eax\n" "4: movl 4(%4), %%edx\n" - "5: movl %%eax, 0(%3)\n" - "6: movl %%edx, 4(%3)\n" + "5: movl %%eax, %%es:0(%3)\n" + "6: movl %%edx, %%es:4(%3)\n" "7: movl 8(%4), %%eax\n" "8: movl 12(%4),%%edx\n" - "9: movl %%eax, 8(%3)\n" - "10: movl %%edx, 12(%3)\n" + "9: movl %%eax, %%es:8(%3)\n" + "10: movl %%edx, %%es:12(%3)\n" "11: movl 16(%4), %%eax\n" "12: movl 20(%4), %%edx\n" - "13: movl %%eax, 16(%3)\n" - "14: movl %%edx, 20(%3)\n" + "13: movl %%eax, %%es:16(%3)\n" + "14: movl %%edx, %%es:20(%3)\n" "15: movl 24(%4), %%eax\n" "16: movl 28(%4), %%edx\n" - "17: movl %%eax, 24(%3)\n" - "18: movl %%edx, 28(%3)\n" + "17: movl %%eax, %%es:24(%3)\n" + "18: movl %%edx, %%es:28(%3)\n" "19: movl 32(%4), %%eax\n" "20: movl 36(%4), %%edx\n" - "21: movl %%eax, 32(%3)\n" - "22: movl %%edx, 36(%3)\n" + "21: movl %%eax, %%es:32(%3)\n" + "22: movl %%edx, %%es:36(%3)\n" "23: movl 40(%4), %%eax\n" "24: movl 44(%4), %%edx\n" - "25: movl %%eax, 40(%3)\n" - "26: movl %%edx, 44(%3)\n" + "25: movl %%eax, %%es:40(%3)\n" + "26: movl %%edx, %%es:44(%3)\n" "27: movl 48(%4), %%eax\n" "28: movl 52(%4), %%edx\n" - "29: movl %%eax, 48(%3)\n" - "30: movl %%edx, 52(%3)\n" + "29: movl %%eax, %%es:48(%3)\n" + "30: movl %%edx, %%es:52(%3)\n" "31: movl 56(%4), %%eax\n" "32: movl 60(%4), %%edx\n" - "33: movl %%eax, 56(%3)\n" - "34: movl %%edx, 60(%3)\n" + "33: movl %%eax, %%es:56(%3)\n" + "34: movl %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -280,6 +404,8 @@ "36: movl %%eax, %0\n" "37: rep; movsb\n" "100:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "101: lea 0(%%eax,%0,4),%0\n" " jmp 100b\n" @@ -326,7 +452,7 @@ " .long 99b,101b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -336,6 +462,7 @@ { int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "0: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -344,36 +471,36 @@ " .align 2,0x90\n" "2: movl 0(%4), %%eax\n" "21: movl 4(%4), %%edx\n" - " movl %%eax, 0(%3)\n" - " movl %%edx, 4(%3)\n" + " movl %%eax, %%es:0(%3)\n" + " movl %%edx, %%es:4(%3)\n" "3: movl 8(%4), %%eax\n" "31: movl 12(%4),%%edx\n" - " movl %%eax, 8(%3)\n" - " movl %%edx, 12(%3)\n" + " movl %%eax, %%es:8(%3)\n" + " movl %%edx, %%es:12(%3)\n" "4: movl 16(%4), %%eax\n" "41: movl 20(%4), %%edx\n" - " movl %%eax, 16(%3)\n" - " movl %%edx, 20(%3)\n" + " movl %%eax, %%es:16(%3)\n" + " movl %%edx, %%es:20(%3)\n" "10: movl 24(%4), %%eax\n" "51: movl 28(%4), %%edx\n" - " movl %%eax, 24(%3)\n" - " movl %%edx, 28(%3)\n" + " movl %%eax, %%es:24(%3)\n" + " movl %%edx, %%es:28(%3)\n" "11: movl 32(%4), %%eax\n" "61: movl 36(%4), %%edx\n" - " movl %%eax, 32(%3)\n" - " movl %%edx, 36(%3)\n" + " movl %%eax, %%es:32(%3)\n" + " movl %%edx, %%es:36(%3)\n" "12: movl 40(%4), %%eax\n" "71: movl 44(%4), %%edx\n" - " movl %%eax, 40(%3)\n" - " movl %%edx, 44(%3)\n" + " movl %%eax, %%es:40(%3)\n" + " movl %%edx, %%es:44(%3)\n" "13: movl 48(%4), %%eax\n" "81: movl 52(%4), %%edx\n" - " movl %%eax, 48(%3)\n" - " movl %%edx, 52(%3)\n" + " movl %%eax, %%es:48(%3)\n" + " movl %%edx, %%es:52(%3)\n" "14: movl 56(%4), %%eax\n" "91: movl 60(%4), %%edx\n" - " movl %%eax, 56(%3)\n" - " movl %%edx, 60(%3)\n" + " movl %%eax, %%es:56(%3)\n" + " movl %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -387,6 +514,8 @@ " movl %%eax,%0\n" "7: rep; movsb\n" "8:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" "16: pushl %0\n" @@ -421,7 +550,7 @@ " .long 7b,16b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -437,6 +566,7 @@ int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "0: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -445,36 +575,36 @@ " .align 2,0x90\n" "2: movl 0(%4), %%eax\n" "21: movl 4(%4), %%edx\n" - " movnti %%eax, 0(%3)\n" - " movnti %%edx, 4(%3)\n" + " movnti %%eax, %%es:0(%3)\n" + " movnti %%edx, %%es:4(%3)\n" "3: movl 8(%4), %%eax\n" "31: movl 12(%4),%%edx\n" - " movnti %%eax, 8(%3)\n" - " movnti %%edx, 12(%3)\n" + " movnti %%eax, %%es:8(%3)\n" + " movnti %%edx, %%es:12(%3)\n" "4: movl 16(%4), %%eax\n" "41: movl 20(%4), %%edx\n" - " movnti %%eax, 16(%3)\n" - " movnti %%edx, 20(%3)\n" + " movnti %%eax, %%es:16(%3)\n" + " movnti %%edx, %%es:20(%3)\n" "10: movl 24(%4), %%eax\n" "51: movl 28(%4), %%edx\n" - " movnti %%eax, 24(%3)\n" - " movnti %%edx, 28(%3)\n" + " movnti %%eax, %%es:24(%3)\n" + " movnti %%edx, %%es:28(%3)\n" "11: movl 32(%4), %%eax\n" "61: movl 36(%4), %%edx\n" - " movnti %%eax, 32(%3)\n" - " movnti %%edx, 36(%3)\n" + " movnti %%eax, %%es:32(%3)\n" + " movnti %%edx, %%es:36(%3)\n" "12: movl 40(%4), %%eax\n" "71: movl 44(%4), %%edx\n" - " movnti %%eax, 40(%3)\n" - " movnti %%edx, 44(%3)\n" + " movnti %%eax, %%es:40(%3)\n" + " movnti %%edx, %%es:44(%3)\n" "13: movl 48(%4), %%eax\n" "81: movl 52(%4), %%edx\n" - " movnti %%eax, 48(%3)\n" - " movnti %%edx, 52(%3)\n" + " movnti %%eax, %%es:48(%3)\n" + " movnti %%edx, %%es:52(%3)\n" "14: movl 56(%4), %%eax\n" "91: movl 60(%4), %%edx\n" - " movnti %%eax, 56(%3)\n" - " movnti %%edx, 60(%3)\n" + " movnti %%eax, %%es:56(%3)\n" + " movnti %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -489,6 +619,8 @@ " movl %%eax,%0\n" "7: rep; movsb\n" "8:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" "16: pushl %0\n" @@ -523,7 +655,7 @@ " .long 7b,16b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -534,6 +666,7 @@ int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "0: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -542,36 +675,36 @@ " .align 2,0x90\n" "2: movl 0(%4), %%eax\n" "21: movl 4(%4), %%edx\n" - " movnti %%eax, 0(%3)\n" - " movnti %%edx, 4(%3)\n" + " movnti %%eax, %%es:0(%3)\n" + " movnti %%edx, %%es:4(%3)\n" "3: movl 8(%4), %%eax\n" "31: movl 12(%4),%%edx\n" - " movnti %%eax, 8(%3)\n" - " movnti %%edx, 12(%3)\n" + " movnti %%eax, %%es:8(%3)\n" + " movnti %%edx, %%es:12(%3)\n" "4: movl 16(%4), %%eax\n" "41: movl 20(%4), %%edx\n" - " movnti %%eax, 16(%3)\n" - " movnti %%edx, 20(%3)\n" + " movnti %%eax, %%es:16(%3)\n" + " movnti %%edx, %%es:20(%3)\n" "10: movl 24(%4), %%eax\n" "51: movl 28(%4), %%edx\n" - " movnti %%eax, 24(%3)\n" - " movnti %%edx, 28(%3)\n" + " movnti %%eax, %%es:24(%3)\n" + " movnti %%edx, %%es:28(%3)\n" "11: movl 32(%4), %%eax\n" "61: movl 36(%4), %%edx\n" - " movnti %%eax, 32(%3)\n" - " movnti %%edx, 36(%3)\n" + " movnti %%eax, %%es:32(%3)\n" + " movnti %%edx, %%es:36(%3)\n" "12: movl 40(%4), %%eax\n" "71: movl 44(%4), %%edx\n" - " movnti %%eax, 40(%3)\n" - " movnti %%edx, 44(%3)\n" + " movnti %%eax, %%es:40(%3)\n" + " movnti %%edx, %%es:44(%3)\n" "13: movl 48(%4), %%eax\n" "81: movl 52(%4), %%edx\n" - " movnti %%eax, 48(%3)\n" - " movnti %%edx, 52(%3)\n" + " movnti %%eax, %%es:48(%3)\n" + " movnti %%edx, %%es:52(%3)\n" "14: movl 56(%4), %%eax\n" "91: movl 60(%4), %%edx\n" - " movnti %%eax, 56(%3)\n" - " movnti %%edx, 60(%3)\n" + " movnti %%eax, %%es:56(%3)\n" + " movnti %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -586,6 +719,8 @@ " movl %%eax,%0\n" "7: rep; movsb\n" "8:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" "16: jmp 8b\n" @@ -614,7 +749,7 @@ " .long 7b,16b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -627,90 +762,146 @@ */ unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size); -unsigned long __copy_user_intel(void __user *to, const void *from, +unsigned long __generic_copy_to_user_intel(void __user *to, const void *from, + unsigned long size); +unsigned long __generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size); unsigned long __copy_user_zeroing_intel_nocache(void *to, const void __user *from, unsigned long size); #endif /* CONFIG_X86_INTEL_USERCOPY */ /* Generic arbitrary sized copy. */ -#define __copy_user(to,from,size) \ -do { \ - int __d0, __d1, __d2; \ - __asm__ __volatile__( \ - " cmp $7,%0\n" \ - " jbe 1f\n" \ - " movl %1,%0\n" \ - " negl %0\n" \ - " andl $7,%0\n" \ - " subl %0,%3\n" \ - "4: rep; movsb\n" \ - " movl %3,%0\n" \ - " shrl $2,%0\n" \ - " andl $3,%3\n" \ - " .align 2,0x90\n" \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "5: addl %3,%0\n" \ - " jmp 2b\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 4b,5b\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ - : "3"(size), "0"(size), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -#define __copy_user_zeroing(to,from,size) \ -do { \ - int __d0, __d1, __d2; \ - __asm__ __volatile__( \ - " cmp $7,%0\n" \ - " jbe 1f\n" \ - " movl %1,%0\n" \ - " negl %0\n" \ - " andl $7,%0\n" \ - " subl %0,%3\n" \ - "4: rep; movsb\n" \ - " movl %3,%0\n" \ - " shrl $2,%0\n" \ - " andl $3,%3\n" \ - " .align 2,0x90\n" \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "5: addl %3,%0\n" \ - " jmp 6f\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - "6: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 4b,5b\n" \ - " .long 0b,3b\n" \ - " .long 1b,6b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ - : "3"(size), "0"(size), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) +static unsigned long +__generic_copy_to_user(void __user *to, const void *from, unsigned long size) +{ + int __d0, __d1, __d2; + + __asm__ __volatile__( + " movw %w8,%%es\n" + " cmp $7,%0\n" + " jbe 1f\n" + " movl %1,%0\n" + " negl %0\n" + " andl $7,%0\n" + " subl %0,%3\n" + "4: rep; movsb\n" + " movl %3,%0\n" + " shrl $2,%0\n" + " andl $3,%3\n" + " .align 2,0x90\n" + "0: rep; movsl\n" + " movl %3,%0\n" + "1: rep; movsb\n" + "2:\n" + " pushl %%ss\n" + " popl %%es\n" + ".section .fixup,\"ax\"\n" + "5: addl %3,%0\n" + " jmp 2b\n" + "3: lea 0(%3,%0,4),%0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 4b,5b\n" + " .long 0b,3b\n" + " .long 1b,2b\n" + ".previous" + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) + : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) + : "memory"); + return size; +} + +static unsigned long +__generic_copy_from_user(void *to, const void __user *from, unsigned long size) +{ + int __d0, __d1, __d2; + + __asm__ __volatile__( + " movw %w8,%%ds\n" + " cmp $7,%0\n" + " jbe 1f\n" + " movl %1,%0\n" + " negl %0\n" + " andl $7,%0\n" + " subl %0,%3\n" + "4: rep; movsb\n" + " movl %3,%0\n" + " shrl $2,%0\n" + " andl $3,%3\n" + " .align 2,0x90\n" + "0: rep; movsl\n" + " movl %3,%0\n" + "1: rep; movsb\n" + "2:\n" + " pushl %%ss\n" + " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "5: addl %3,%0\n" + " jmp 2b\n" + "3: lea 0(%3,%0,4),%0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 4b,5b\n" + " .long 0b,3b\n" + " .long 1b,2b\n" + ".previous" + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) + : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) + : "memory"); + return size; +} + +static unsigned long +__copy_user_zeroing(void *to, const void __user *from, unsigned long size) +{ + int __d0, __d1, __d2; + + __asm__ __volatile__( + " movw %w8,%%ds\n" + " cmp $7,%0\n" + " jbe 1f\n" + " movl %1,%0\n" + " negl %0\n" + " andl $7,%0\n" + " subl %0,%3\n" + "4: rep; movsb\n" + " movl %3,%0\n" + " shrl $2,%0\n" + " andl $3,%3\n" + " .align 2,0x90\n" + "0: rep; movsl\n" + " movl %3,%0\n" + "1: rep; movsb\n" + "2:\n" + " pushl %%ss\n" + " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "5: addl %3,%0\n" + " jmp 6f\n" + "3: lea 0(%3,%0,4),%0\n" + "6: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 4b,5b\n" + " .long 0b,3b\n" + " .long 1b,6b\n" + ".previous" + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) + : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) + : "memory"); + return size; +} unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) @@ -766,9 +957,9 @@ } #endif if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); + n = __generic_copy_to_user(to, from, n); else - n = __copy_user_intel(to, from, n); + n = __generic_copy_to_user_intel(to, from, n); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -778,7 +969,7 @@ { BUG_ON((long)n < 0); if (movsl_is_ok(to, from, n)) - __copy_user_zeroing(to, fr