diff -Naur linux-2.6.19.2.orig/Documentation/dontdiff linux-2.6.19.2/Documentation/dontdiff --- linux-2.6.19.2.orig/Documentation/dontdiff 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/Documentation/dontdiff 2007-01-12 23:27:23.000000000 +0000 @@ -55,7 +55,7 @@ aicasm aicdb.h* asm -asm-offsets.* +asm-offsets.h asm_offsets.* autoconf.h* bbootsect @@ -143,4 +143,5 @@ vsyscall.lds wanxlfw.inc uImage +utsrelease.h zImage diff -Naur linux-2.6.19.2.orig/Makefile linux-2.6.19.2/Makefile --- linux-2.6.19.2.orig/Makefile 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/Makefile 2007-01-12 23:27:23.000000000 +0000 @@ -313,7 +313,7 @@ CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) -CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ +CFLAGS := -Wall -W -Wno-unused -Wno-sign-compare -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common AFLAGS := -D__ASSEMBLY__ @@ -559,7 +559,7 @@ ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff -Naur linux-2.6.19.2.orig/arch/alpha/kernel/module.c linux-2.6.19.2/arch/alpha/kernel/module.c --- linux-2.6.19.2.orig/arch/alpha/kernel/module.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/alpha/kernel/module.c 2007-01-12 23:27:20.000000000 +0000 @@ -177,7 +177,7 @@ /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff -Naur linux-2.6.19.2.orig/arch/alpha/kernel/osf_sys.c linux-2.6.19.2/arch/alpha/kernel/osf_sys.c --- linux-2.6.19.2.orig/arch/alpha/kernel/osf_sys.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/alpha/kernel/osf_sys.c 2007-01-12 23:27:20.000000000 +0000 @@ -1277,6 +1277,10 @@ merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != (unsigned long) -ENOMEM) @@ -1284,8 +1288,8 @@ } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(current->mm->mmap_base), len, limit); + if (addr != (unsigned long) -ENOMEM) return addr; diff -Naur linux-2.6.19.2.orig/arch/alpha/kernel/ptrace.c linux-2.6.19.2/arch/alpha/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/alpha/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/alpha/kernel/ptrace.c 2007-01-12 23:27:20.000000000 +0000 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -283,6 +284,9 @@ goto out_notsk; } + if (gr_handle_ptrace(child, request)) + goto out; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -Naur linux-2.6.19.2.orig/arch/alpha/mm/fault.c linux-2.6.19.2/arch/alpha/mm/fault.c --- linux-2.6.19.2.orig/arch/alpha/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/alpha/mm/fault.c 2007-01-12 23:27:20.000000000 +0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,124 @@ __reload_thread(pcb); } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long*)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif /* * This routine handles page faults. It determines the address, @@ -132,8 +251,29 @@ good_area: si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc) + goto bad_area; + + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->pc, (void*)rdusp()); + do_exit(SIGKILL); +#else goto bad_area; +#endif + + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff -Naur linux-2.6.19.2.orig/arch/arm/mm/mmap.c linux-2.6.19.2/arch/arm/mm/mmap.c --- linux-2.6.19.2.orig/arch/arm/mm/mmap.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/arm/mm/mmap.c 2007-01-12 23:27:21.000000000 +0000 @@ -61,6 +61,10 @@ if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -75,7 +79,7 @@ if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -92,8 +96,8 @@ * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } diff -Naur linux-2.6.19.2.orig/arch/i386/Kconfig linux-2.6.19.2/arch/i386/Kconfig --- linux-2.6.19.2.orig/arch/i386/Kconfig 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/Kconfig 2007-01-12 23:27:21.000000000 +0000 @@ -803,7 +803,7 @@ config COMPAT_VDSO bool "Compat VDSO support" - default y + default n depends on !PARAVIRT help Map the VDSO to the predictable old-style address too. @@ -999,7 +999,7 @@ choice prompt "PCI access mode" depends on PCI && !X86_VISWS - default PCI_GOANY + default PCI_GODIRECT ---help--- On PCI systems, the BIOS can be used to detect the PCI devices and determine their configuration. However, some old PCI motherboards @@ -1031,7 +1031,7 @@ config PCI_BIOS bool - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) + depends on !X86_VISWS && PCI && PCI_GOBIOS default y config PCI_DIRECT diff -Naur linux-2.6.19.2.orig/arch/i386/Kconfig.cpu linux-2.6.19.2/arch/i386/Kconfig.cpu --- linux-2.6.19.2.orig/arch/i386/Kconfig.cpu 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/Kconfig.cpu 2007-01-12 23:27:21.000000000 +0000 @@ -252,7 +252,7 @@ config X86_F00F_BUG bool - depends on M586MMX || M586TSC || M586 || M486 || M386 + depends on (M586MMX || M586TSC || M586 || M486 || M386) && !PAX_KERNEXEC default y config X86_WP_WORKS_OK @@ -282,7 +282,7 @@ config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 default y config X86_GOOD_APIC diff -Naur linux-2.6.19.2.orig/arch/i386/Kconfig.debug linux-2.6.19.2/arch/i386/Kconfig.debug --- linux-2.6.19.2.orig/arch/i386/Kconfig.debug 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/Kconfig.debug 2007-01-12 23:27:21.000000000 +0000 @@ -48,7 +48,7 @@ config DEBUG_RODATA bool "Write protect kernel read-only data structures" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !PAX_KERNEXEC help Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const diff -Naur linux-2.6.19.2.orig/arch/i386/boot/compressed/head.S linux-2.6.19.2/arch/i386/boot/compressed/head.S --- linux-2.6.19.2.orig/arch/i386/boot/compressed/head.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/boot/compressed/head.S 2007-01-12 23:27:21.000000000 +0000 @@ -39,11 +39,13 @@ movl %eax,%gs lss stack_start,%esp + movl 0x000000,%ecx xorl %eax,%eax 1: incl %eax # check that A20 really IS enabled movl %eax,0x000000 # loop forever if it isn't cmpl %eax,0x100000 je 1b + movl %ecx,0x000000 /* * Initialize eflags. Some BIOS's leave bits like NT set. This would diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/alternative.c linux-2.6.19.2/arch/i386/kernel/alternative.c --- linux-2.6.19.2.orig/arch/i386/kernel/alternative.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/alternative.c 2007-01-12 23:27:21.000000000 +0000 @@ -3,6 +3,7 @@ #include #include #include +#include static int no_replacement = 0; static int smp_alt_once = 0; @@ -142,6 +143,12 @@ u8 *instr; int diff, i, k; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { BUG_ON(a->replacementlen > a->instrlen); @@ -156,16 +163,21 @@ __FUNCTION__, a->instr, instr); } #endif - memcpy(instr, a->replacement, a->replacementlen); + memcpy(instr + __KERNEL_TEXT_OFFSET, a->replacement, a->replacementlen); diff = a->instrlen - a->replacementlen; /* Pad the rest with nops */ for (i = a->replacementlen; diff > 0; diff -= k, i += k) { k = diff; if (k > ASM_NOP_MAX) k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); + memcpy(a->instr + i + __KERNEL_TEXT_OFFSET, noptable[k], k); } } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } #ifdef CONFIG_SMP @@ -174,50 +186,96 @@ { struct alt_instr *a; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { memcpy(a->replacement + a->replacementlen, - a->instr, + a->instr + __KERNEL_TEXT_OFFSET, a->instrlen); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) { struct alt_instr *a; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + for (a = start; a < end; a++) { - memcpy(a->instr, + memcpy(a->instr + __KERNEL_TEXT_OFFSET, a->replacement + a->replacementlen, a->instrlen); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) { - u8 **ptr; + u8 *ptr; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) + pax_open_kernel(cr0); +#endif + + for (; start < end; start++) { + ptr = *start + __KERNEL_TEXT_OFFSET; + if (ptr < text) continue; - if (*ptr > text_end) + if (ptr > text_end) continue; - **ptr = 0xf0; /* lock prefix */ + *ptr = 0xf0; /* lock prefix */ }; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { unsigned char **noptable = find_nop_table(); - u8 **ptr; + u8 *ptr; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) + for (; start < end; start++) { + ptr = *start + __KERNEL_TEXT_OFFSET; + if (ptr < text) continue; - if (*ptr > text_end) + if (ptr > text_end) continue; - **ptr = noptable[1][0]; + *ptr = noptable[1][0]; }; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } struct smp_alt_module { diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/apic.c linux-2.6.19.2/arch/i386/kernel/apic.c --- linux-2.6.19.2.orig/arch/i386/kernel/apic.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/apic.c 2007-01-12 23:27:21.000000000 +0000 @@ -1197,7 +1197,7 @@ { profile_tick(CPU_PROFILING); #ifdef CONFIG_SMP - update_process_times(user_mode_vm(get_irq_regs())); + update_process_times(user_mode(get_irq_regs())); #endif /* diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/apm.c linux-2.6.19.2/arch/i386/kernel/apm.c --- linux-2.6.19.2.orig/arch/i386/kernel/apm.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/apm.c 2007-01-12 23:27:21.000000000 +0000 @@ -235,7 +235,7 @@ #include "io_ports.h" extern unsigned long get_cmos_time(void); -extern void machine_real_restart(unsigned char *, int); +extern void machine_real_restart(const unsigned char *, unsigned int); #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -608,9 +608,18 @@ struct desc_struct save_desc_40; struct desc_struct *gdt; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + cpus = apm_save_cpus(); cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; @@ -621,6 +630,11 @@ APM_DO_RESTORE_SEGS; apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); apm_restore_cpus(cpus); @@ -651,9 +665,18 @@ struct desc_struct save_desc_40; struct desc_struct *gdt; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + cpus = apm_save_cpus(); cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; @@ -664,6 +687,11 @@ APM_DO_RESTORE_SEGS; apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); apm_restore_cpus(cpus); return error; @@ -931,7 +959,7 @@ static void apm_power_off(void) { - unsigned char po_bios_call[] = { + const unsigned char po_bios_call[] = { 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ 0x8e, 0xd0, /* movw ax,ss */ 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/asm-offsets.c linux-2.6.19.2/arch/i386/kernel/asm-offsets.c --- linux-2.6.19.2.orig/arch/i386/kernel/asm-offsets.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/asm-offsets.c 2007-01-12 23:27:21.000000000 +0000 @@ -71,6 +71,7 @@ sizeof(struct tss_struct)); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(PTRS_PER_PTE_asm, PTRS_PER_PTE); DEFINE(VDSO_PRELINK, VDSO_PRELINK); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/cpu/common.c linux-2.6.19.2/arch/i386/kernel/cpu/common.c --- linux-2.6.19.2.orig/arch/i386/kernel/cpu/common.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/cpu/common.c 2007-01-12 23:27:21.000000000 +0000 @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -21,16 +20,18 @@ #include "cpu.h" -DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); -EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); - DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) +static int disable_x86_sep __cpuinitdata = 1; +#else static int disable_x86_sep __cpuinitdata; +#endif struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -600,11 +601,10 @@ void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct * t = init_tss + cpu; struct thread_struct *thread = ¤t->thread; - struct desc_struct *gdt; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -621,36 +621,12 @@ set_in_cr4(X86_CR4_TSD); } - /* The CPU hotplug case */ - if (cpu_gdt_descr->address) { - gdt = (struct desc_struct *)cpu_gdt_descr->address; - memset(gdt, 0, PAGE_SIZE); - goto old_gdt; - } - /* - * This is a horrible hack to allocate the GDT. The problem - * is that cpu_init() is called really early for the boot CPU - * (and hence needs bootmem) but much later for the secondary - * CPUs, when bootmem will have gone away - */ - if (NODE_DATA(0)->bdata->node_bootmem_map) { - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); - /* alloc_bootmem_pages panics on failure, so no check */ - memset(gdt, 0, PAGE_SIZE); - } else { - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); - if (unlikely(!gdt)) { - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - for (;;) - local_irq_enable(); - } - } -old_gdt: /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - memcpy(gdt, cpu_gdt_table, GDT_SIZE); + if (cpu) + memcpy(gdt, cpu_gdt_table, GDT_SIZE); /* Set up GDT entry for 16bit stack */ *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= @@ -658,10 +634,10 @@ ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | (CPU_16BIT_STACK_SIZE - 1); - cpu_gdt_descr->size = GDT_SIZE - 1; - cpu_gdt_descr->address = (unsigned long)gdt; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; + cpu_gdt_descr[cpu].address = (unsigned long)gdt; - load_gdt(cpu_gdt_descr); + load_gdt(&cpu_gdt_descr[cpu]); load_idt(&idt_descr); /* diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/crash.c linux-2.6.19.2/arch/i386/kernel/crash.c --- linux-2.6.19.2.orig/arch/i386/kernel/crash.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/crash.c 2007-01-12 23:27:21.000000000 +0000 @@ -117,7 +117,7 @@ return NOTIFY_STOP; local_irq_disable(); - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/doublefault.c linux-2.6.19.2/arch/i386/kernel/doublefault.c --- linux-2.6.19.2.orig/arch/i386/kernel/doublefault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/doublefault.c 2007-01-12 23:27:21.000000000 +0000 @@ -11,7 +11,7 @@ #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) +#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2) #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000) @@ -57,10 +57,10 @@ .eip = (unsigned long) doublefault_fn, .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ .esp = STACK_START, - .es = __USER_DS, + .es = __KERNEL_DS, .cs = __KERNEL_CS, .ss = __KERNEL_DS, - .ds = __USER_DS, + .ds = __KERNEL_DS, .__cr3 = __pa(swapper_pg_dir) }; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/efi.c linux-2.6.19.2/arch/i386/kernel/efi.c --- linux-2.6.19.2.orig/arch/i386/kernel/efi.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/efi.c 2007-01-12 23:27:21.000000000 +0000 @@ -63,82 +63,43 @@ static unsigned long efi_rt_eflags; static DEFINE_SPINLOCK(efi_rt_lock); -static pgd_t efi_bak_pg_dir_pointer[2]; +static pgd_t __initdata efi_bak_pg_dir_pointer[KERNEL_PGD_PTRS] __attribute__ ((aligned (4096))); static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) { - unsigned long cr4; - unsigned long temp; - struct Xgt_desc_struct *cpu_gdt_descr; - spin_lock(&efi_rt_lock); local_irq_save(efi_rt_eflags); - cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - - /* - * If I don't have PSE, I should just duplicate two entries in page - * directory. If I have PSE, I just need to duplicate one entry in - * page directory. - */ - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - swapper_pg_dir[0].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - } else { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - efi_bak_pg_dir_pointer[1].pgd = - swapper_pg_dir[pgd_index(0x400000)].pgd; - swapper_pg_dir[pgd_index(0)].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - temp = PAGE_OFFSET + 0x400000; - swapper_pg_dir[pgd_index(0x400000)].pgd = - swapper_pg_dir[pgd_index(temp)].pgd; - } + clone_pgd_range(efi_bak_pg_dir_pointer, swapper_pg_dir, KERNEL_PGD_PTRS); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + USER_PGD_PTRS >= KERNEL_PGD_PTRS ? KERNEL_PGD_PTRS : USER_PGD_PTRS); /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); - cpu_gdt_descr->address = __pa(cpu_gdt_descr->address); - load_gdt(cpu_gdt_descr); + cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); + load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])); } static void efi_call_phys_epilog(void) __releases(efi_rt_lock) { - unsigned long cr4; - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - - cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); - load_gdt(cpu_gdt_descr); + cpu_gdt_descr[0].address = (unsigned long) __va(cpu_gdt_descr[0].address); + load_gdt(&cpu_gdt_descr[0]); - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - } else { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - swapper_pg_dir[pgd_index(0x400000)].pgd = - efi_bak_pg_dir_pointer[1].pgd; - } + clone_pgd_range(swapper_pg_dir, efi_bak_pg_dir_pointer, KERNEL_PGD_PTRS); /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); local_irq_restore(efi_rt_eflags); spin_unlock(&efi_rt_lock); } -static efi_status_t +static efi_status_t __init phys_efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, u32 descriptor_version, @@ -154,7 +115,7 @@ return status; } -static efi_status_t +static efi_status_t __init phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { efi_status_t status; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/efi_stub.S linux-2.6.19.2/arch/i386/kernel/efi_stub.S --- linux-2.6.19.2.orig/arch/i386/kernel/efi_stub.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/efi_stub.S 2007-01-12 23:27:21.000000000 +0000 @@ -6,6 +6,7 @@ */ #include +#include #include /* @@ -20,7 +21,7 @@ * service functions will comply with gcc calling convention, too. */ -.text +__INIT ENTRY(efi_call_phys) /* * 0. The function can only be called in Linux kernel. So CS has been @@ -36,9 +37,7 @@ * The mapping of lower virtual memory has been created in prelog and * epilog. */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx + jmp 1f-__PAGE_OFFSET 1: /* @@ -47,14 +46,8 @@ * parameter 2, ..., param n. To make things easy, we save the return * address of efi_call_phys in a global variable. */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx + popl (saved_return_addr) + popl (efi_rt_function_ptr) /* * 3. Clear PG bit in %CR0. @@ -73,9 +66,8 @@ /* * 5. Call the physical function. */ - jmp *%ecx + call *(efi_rt_function_ptr-__PAGE_OFFSET) -2: /* * 6. After EFI runtime service returns, control will return to * following instruction. We'd better readjust stack pointer first. @@ -85,37 +77,29 @@ /* * 7. Restore PG bit */ - movl %cr0, %edx - orl $0x80000000, %edx - movl %edx, %cr0 - jmp 1f -1: /* * 8. Now restore the virtual mode from flat mode by * adding EIP with PAGE_OFFSET. */ - movl $1f, %edx - jmp *%edx + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + jmp 1f+__PAGE_OFFSET 1: /* * 9. Balance the stack. And because EAX contain the return value, * we'd better not clobber it. */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx + pushl (efi_rt_function_ptr) /* - * 10. Push the saved return address onto the stack and return. + * 10. Return to the saved return address. */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret + jmpl *(saved_return_addr) .previous -.data +__INITDATA saved_return_addr: .long 0 efi_rt_function_ptr: diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/entry.S linux-2.6.19.2/arch/i386/kernel/entry.S --- linux-2.6.19.2.orig/arch/i386/kernel/entry.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/entry.S 2007-01-12 23:27:21.000000000 +0000 @@ -82,6 +82,8 @@ #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit #define INTERRUPT_RETURN iret #define GET_CR0_INTO_EAX movl %cr0, %eax +#define GET_CR0_INTO_EDX movl %cr0, %edx +#define SET_CR0_FROM_EDX movl %edx, %cr0 #ifdef CONFIG_PREEMPT #define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF @@ -105,7 +107,7 @@ #define resume_userspace_sig resume_userspace #endif -#define SAVE_ALL \ +#define __SAVE_ALL(_DS) \ cld; \ pushl %es; \ CFI_ADJUST_CFA_OFFSET 4;\ @@ -134,10 +136,24 @@ pushl %ebx; \ CFI_ADJUST_CFA_OFFSET 4;\ CFI_REL_OFFSET ebx, 0;\ - movl $(__USER_DS), %edx; \ + movl $(_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; +#ifdef CONFIG_PAX_KERNEXEC +#define SAVE_ALL \ + __SAVE_ALL(__KERNEL_DS) \ + GET_CR0_INTO_EDX; \ + movl %edx, %esi; \ + orl $0x10000, %edx; \ + xorl %edx, %esi; \ + SET_CR0_FROM_EDX +#elif defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) +#define SAVE_ALL __SAVE_ALL(__KERNEL_DS) +#else +#define SAVE_ALL __SAVE_ALL(__USER_DS) +#endif + #define RESTORE_INT_REGS \ popl %ebx; \ CFI_ADJUST_CFA_OFFSET -4;\ @@ -245,7 +261,18 @@ movb CS(%esp), %al andl $(VM_MASK | SEGMENT_RPL_MASK), %eax cmpl $USER_RPL, %eax + +#ifdef CONFIG_PAX_KERNEXEC + jae resume_userspace + + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX + jmp resume_kernel +#else jb resume_kernel # not returning to v8086 or userspace +#endif + ENTRY(resume_userspace) DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -301,10 +328,9 @@ /*CFI_REL_OFFSET cs, 0*/ /* * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) + GET_THREAD_INFO(%ebp) + pushl TI_sysenter_return(%ebp) CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 @@ -312,9 +338,20 @@ * Load the potential sixth argument from user stack. * Careful about security. */ + movl 12(%esp),%ebp + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl $(__USER_DS) + CFI_ADJUST_CFA_OFFSET 4 + pop %ds + CFI_ADJUST_CFA_OFFSET -4 +1: movl %ds:(%ebp),%ebp +#else cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault 1: movl (%ebp),%ebp +#endif + .section __ex_table,"a" .align 4 .long 1b,syscall_fault @@ -337,14 +374,36 @@ movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_PAX_RANDKSTACK + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + call pax_randomize_kstack + popl %eax + CFI_ADJUST_CFA_OFFSET -4 +#endif + /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON +1: mov DS(%esp), %ds +2: mov ES(%esp), %es ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC +.section .fixup,"ax" +3: movl $0,DS(%esp) + jmp 1b +4: movl $0,ES(%esp) + jmp 2b +.previous +.section __ex_table,"a" + .align 4 + .long 1b,3b + .long 2b,4b +.previous # system call handler stub ENTRY(system_call) @@ -375,6 +434,10 @@ testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work +#ifdef CONFIG_PAX_RANDKSTACK + call pax_randomize_kstack +#endif + restore_all: movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS # Warning: OLDSS(%esp) contains the wrong/random values if we @@ -553,7 +616,7 @@ * Build the entry stubs and pointer table with * some assembler magic. */ -.data +.section .rodata,"a",@progbits ENTRY(interrupt) .text @@ -568,7 +631,7 @@ 1: pushl $~(vector) CFI_ADJUST_CFA_OFFSET 4 jmp common_interrupt -.data +.section .rodata,"a",@progbits .long 1b .text vector=vector+1 @@ -642,12 +705,21 @@ popl %ecx CFI_ADJUST_CFA_OFFSET -4 /*CFI_REGISTER es, ecx*/ + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + movl %edx, %esi + orl $0x10000, %edx + xorl %edx, %esi + SET_CR0_FROM_EDX +#endif + movl ES(%esp), %edi # get the function address movl ORIG_EAX(%esp), %edx # get the error code movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) /*CFI_REL_OFFSET es, ES*/ - movl $(__USER_DS), %ecx + movl $(__KERNEL_DS), %ecx movl %ecx, %ds movl %ecx, %es movl %esp,%eax # pt_regs pointer @@ -778,6 +850,13 @@ xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX +#endif + jmp restore_nocheck_notrace CFI_ENDPROC @@ -820,6 +899,13 @@ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved xorl %edx,%edx # zero error code call do_nmi + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX +#endif + RESTORE_REGS lss 12+4(%esp), %esp # back to 16bit stack 1: INTERRUPT_RETURN @@ -957,8 +1043,8 @@ movl %edi, EDI(%edx) movl %ebp, EBP(%edx) movl %ebx, EAX(%edx) - movl $__USER_DS, DS(%edx) - movl $__USER_DS, ES(%edx) + movl $__KERNEL_DS, DS(%edx) + movl $__KERNEL_DS, ES(%edx) movl %ebx, ORIG_EAX(%edx) movl %ecx, EIP(%edx) movl 12(%esp), %ecx @@ -987,7 +1073,6 @@ CFI_ENDPROC ENDPROC(kernel_thread_helper) -.section .rodata,"a" #include "syscall_table.S" syscall_table_size=(.-sys_call_table) diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/head.S linux-2.6.19.2/arch/i386/kernel/head.S --- linux-2.6.19.2.orig/arch/i386/kernel/head.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/head.S 2007-01-12 23:27:21.000000000 +0000 @@ -45,6 +45,16 @@ */ #define INIT_MAP_BEYOND_END (128*1024) +#ifdef CONFIG_PAX_KERNEXEC +/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */ +.fill 4096,1,0xcc +#endif + +/* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) /* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, @@ -66,6 +76,36 @@ movl %eax,%fs movl %eax,%gs +#ifdef CONFIG_PAX_MEMORY_UDEREF + /* check for VMware */ + movl $0x564d5868,%eax + xorl %ebx,%ebx + movl $0xa,%ecx + movl $0x5658,%edx + in (%dx),%eax + cmpl $0x564d5868,%ebx + jz 1f + + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),%eax + movl %eax,(cpu_gdt_table - __PAGE_OFFSET + GDT_ENTRY_KERNEL_DS * 8 + 4) + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0f300),%eax + movl %eax,(cpu_gdt_table - __PAGE_OFFSET + GDT_ENTRY_DEFAULT_USER_DS * 8 + 4) +1: +#endif + +#ifdef CONFIG_PAX_KERNEXEC + movl $ __KERNEL_TEXT_OFFSET,%eax + movw %ax,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 2) + rorl $16,%eax + movb %al,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 4) + movb %ah,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 7) + + movb %al,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 4) + movb %ah,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 7) + rorl $16,%eax + movw %ax,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 2) +#endif + /* * Clear BSS first so that there are no surprises... * No need to cld as DF is already clear from cld above... @@ -113,24 +153,42 @@ * Warning: don't use %esi or the stack in this code. However, %esp * can be used as a GPR if you really need it... */ -page_pde_offset = (__PAGE_OFFSET >> 20); - +#ifdef CONFIG_X86_PAE +page_pde_offset = ((__PAGE_OFFSET >> 21) * (4096 / PTRS_PER_PTE_asm)); +#else +page_pde_offset = ((__PAGE_OFFSET >> 22) * (4096 / PTRS_PER_PTE_asm)); +#endif movl $(pg0 - __PAGE_OFFSET), %edi +#ifdef CONFIG_X86_PAE + movl $(swapper_pm_dir - __PAGE_OFFSET), %edx +#else movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ +#endif + movl $0x063, %eax /* 0x063 = DIRTY+ACCESSED+PRESENT+RW */ 10: - leal 0x007(%edi),%ecx /* Create PDE entry */ + leal 0x063(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ +#ifdef CONFIG_X86_PAE + movl $0,4(%edx) + movl $0,page_pde_offset+4(%edx) + addl $8,%edx + movl $512, %ecx +#else addl $4,%edx movl $1024, %ecx +#endif 11: stosl +#ifdef CONFIG_X86_PAE + movl $0,(%edi) + addl $4,%edi +#endif addl $0x1000,%eax loop 11b /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp + /* bytes beyond the end of our own page tables; the +0x063 is the attribute bits */ + leal (INIT_MAP_BEYOND_END+0x063)(%edi),%ebp cmpl %ebp,%eax jb 10b movl %edi,(init_pg_tables_end - __PAGE_OFFSET) @@ -153,6 +211,11 @@ movl %eax,%fs movl %eax,%gs + /* This is a secondary processor (AP) */ + xorl %ebx,%ebx + incl %ebx +#endif /* CONFIG_SMP */ + /* * New page tables may be in 4Mbyte page mode and may * be using the global pages. @@ -168,26 +231,27 @@ * not yet offset PAGE_OFFSET.. */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET +3: movl cr4_bits,%edx andl %edx,%edx - jz 6f + jz 5f movl %cr4,%eax # Turn on paging options (PSE,PAE,..) orl %edx,%eax movl %eax,%cr4 - btl $5, %eax # check if PAE is enabled - jnc 6f +#ifdef CONFIG_X86_PAE + movl %ebx,%edi /* Check if extended functions are implemented */ movl $0x80000000, %eax cpuid cmpl $0x80000000, %eax - jbe 6f + jbe 4f mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ btl $20, %edx - jnc 6f + jnc 4f /* Setup EFER (Extended Feature Enable Register) */ movl $0xc0000080, %ecx @@ -196,14 +260,12 @@ btsl $11, %eax /* Make changes effective */ wrmsr + btsl $63,__supported_pte_mask-__PAGE_OFFSET -6: - /* This is a secondary processor (AP) */ - xorl %ebx,%ebx - incl %ebx - -3: -#endif /* CONFIG_SMP */ +4: + movl %edi,%ebx +#endif +5: /* * Enable paging @@ -228,9 +290,7 @@ #ifdef CONFIG_SMP andl %ebx,%ebx - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: + jnz checkCPUtype /* Initial CPU cleans BSS */ #endif /* CONFIG_SMP */ /* @@ -307,8 +367,6 @@ ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - - movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds movl %eax,%es @@ -433,8 +491,8 @@ /* This is the default interrupt "handler" :-) */ ALIGN ignore_int: - cld #ifdef CONFIG_PRINTK + cld pushl %eax pushl %ecx pushl %edx @@ -465,32 +523,50 @@ #endif iret -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - -/* - * BSS section - */ -.section ".bss.page_aligned","w" +.section .swapper_pg_dir,"a",@progbits ENTRY(swapper_pg_dir) +#ifdef CONFIG_X86_PAE + .long swapper_pm_dir-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*8-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*16-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*24-__PAGE_OFFSET+1 + .long 0 +#else .fill 1024,4,0 +#endif + +#ifdef CONFIG_X86_PAE +.section .swapper_pm_dir,"a",@progbits +ENTRY(swapper_pm_dir) + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 +#endif + +.section .empty_zero_page,"a",@progbits ENTRY(empty_zero_page) .fill 4096,1,0 /* - * This starts the data section. - */ -.data + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +.section .idt,"a",@progbits +ENTRY(idt_table) + .fill 256,8,0 + +.section .rodata,"a",@progbits +ready: .byte 0 ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE + .long init_thread_union+THREAD_SIZE-8 .long __BOOT_DS -ready: .byte 0 - early_recursion_flag: .long 0 @@ -525,10 +601,12 @@ # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address -cpu_gdt_descr: +ENTRY(cpu_gdt_descr) .word GDT_ENTRIES*8-1 .long cpu_gdt_table + .fill NR_CPUS*8-6,1,0 # space for the other GDT descriptors + /* * The boot_gdt_table must mirror the equivalent in setup.S and is * used only for booting. @@ -536,13 +614,13 @@ .align L1_CACHE_BYTES ENTRY(boot_gdt_table) .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align L1_CACHE_BYTES + .align PAGE_SIZE_asm ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ @@ -557,10 +635,10 @@ .quad 0x0000000000000000 /* 0x53 reserved */ .quad 0x0000000000000000 /* 0x5b reserved */ - .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ @@ -570,24 +648,30 @@ * They code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ - .quad 0x00409a000000ffff /* 0x90 32-bit code */ - .quad 0x00009a000000ffff /* 0x98 16-bit code */ - .quad 0x000092000000ffff /* 0xa0 16-bit data */ - .quad 0x0000920000000000 /* 0xa8 16-bit data */ - .quad 0x0000920000000000 /* 0xb0 16-bit data */ + .quad 0x00409b000000ffff /* 0x90 32-bit code */ + .quad 0x00009b000000ffff /* 0x98 16-bit code */ + .quad 0x000093000000ffff /* 0xa0 16-bit data */ + .quad 0x0000930000000000 /* 0xa8 16-bit data */ + .quad 0x0000930000000000 /* 0xb0 16-bit data */ /* * The APM segments have byte granularity and their bases * are set at run time. All have 64k limits. */ - .quad 0x00409a000000ffff /* 0xb8 APM CS code */ - .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x004092000000ffff /* 0xc8 APM DS data */ + .quad 0x00409b000000ffff /* 0xb8 APM CS code */ + .quad 0x00009b000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004093000000ffff /* 0xc8 APM DS data */ - .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ + .quad 0x0000930000000000 /* 0xd0 - ESPFIX 16-bit SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ + /* Be sure this is zeroed to avoid false validations in Xen */ + .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 + +#ifdef CONFIG_SMP + .fill (NR_CPUS-1) * (PAGE_SIZE_asm / 8),8,0 /* other CPU's GDT */ +#endif diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/i386_ksyms.c linux-2.6.19.2/arch/i386/kernel/i386_ksyms.c --- linux-2.6.19.2.orig/arch/i386/kernel/i386_ksyms.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/i386_ksyms.c 2007-01-12 23:27:21.000000000 +0000 @@ -2,12 +2,16 @@ #include #include +EXPORT_SYMBOL_GPL(cpu_gdt_table); + EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(csum_partial_copy_generic_to_user); +EXPORT_SYMBOL(csum_partial_copy_generic_from_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/init_task.c linux-2.6.19.2/arch/i386/kernel/init_task.c --- linux-2.6.19.2.orig/arch/i386/kernel/init_task.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/init_task.c 2007-01-12 23:27:21.000000000 +0000 @@ -42,5 +42,5 @@ * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +struct tss_struct init_tss[NR_CPUS] ____cacheline_internodealigned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS }; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/io_apic.c linux-2.6.19.2/arch/i386/kernel/io_apic.c --- linux-2.6.19.2.orig/arch/i386/kernel/io_apic.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/io_apic.c 2007-01-12 23:27:21.000000000 +0000 @@ -350,8 +350,8 @@ # define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) # define Dprintk(x...) do { TDprintk(x); } while (0) # else -# define TDprintk(x...) -# define Dprintk(x...) +# define TDprintk(x...) do {} while (0) +# define Dprintk(x...) do {} while (0) # endif #define IRQBALANCE_CHECK_ARCH -999 diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/ioport.c linux-2.6.19.2/arch/i386/kernel/ioport.c --- linux-2.6.19.2.orig/arch/i386/kernel/ioport.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/ioport.c 2007-01-12 23:27:21.000000000 +0000 @@ -16,6 +16,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) @@ -64,9 +65,16 @@ if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on) { + gr_handle_ioperm(); +#else if (turn_on && !capable(CAP_SYS_RAWIO)) +#endif return -EPERM; - +#ifdef CONFIG_GRKERNSEC_IO + } +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), @@ -89,7 +97,7 @@ * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); @@ -143,8 +151,13 @@ return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + gr_handle_iopl(); + return -EPERM; +#else if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#endif } t->iopl = level << 12; regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/irq.c linux-2.6.19.2/arch/i386/kernel/irq.c --- linux-2.6.19.2.orig/arch/i386/kernel/irq.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/irq.c 2007-01-12 23:27:21.000000000 +0000 @@ -100,7 +100,7 @@ int arg1, arg2, ebx; /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)) - 2; irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; @@ -137,10 +137,10 @@ * gcc's 3.0 and earlier don't handle that correctly. */ static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); /* * allocate per-cpu stacks for hardirq and for softirq processing @@ -200,7 +200,7 @@ irqctx->tinfo.previous_esp = current_stack_pointer; /* build the stack frame on the softirq stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)) - 2; asm volatile( " xchgl %%ebx,%%esp \n" diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/kprobes.c linux-2.6.19.2/arch/i386/kernel/kprobes.c --- linux-2.6.19.2.orig/arch/i386/kernel/kprobes.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/kprobes.c 2007-01-12 23:27:21.000000000 +0000 @@ -661,7 +661,7 @@ struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - if (args->regs && user_mode_vm(args->regs)) + if (args->regs && user_mode(args->regs)) return ret; switch (val) { diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/ldt.c linux-2.6.19.2/arch/i386/kernel/ldt.c --- linux-2.6.19.2.orig/arch/i386/kernel/ldt.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/ldt.c 2007-01-12 23:27:21.000000000 +0000 @@ -20,6 +20,9 @@ #include #include +const struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 } }; + #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) { @@ -103,6 +106,22 @@ retval = copy_ldt(&mm->context, &old_mm->context); up(&old_mm->context.sem); } + + if (tsk == current) { + mm->context.vdso = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + mm->context.user_cs_base = 0UL; + mm->context.user_cs_limit = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpus_clear(mm->context.cpu_user_cs_mask); +#endif + +#endif + + } + return retval; } @@ -160,11 +179,11 @@ { int err; unsigned long size; - void *address; + const void *address; err = 0; address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); + size = sizeof default_ldt; if (size > bytecount) size = bytecount; @@ -215,6 +234,13 @@ } } +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) { + error = -EINVAL; + goto out_unlock; + } +#endif + entry_1 = LDT_entry_a(&ldt_info); entry_2 = LDT_entry_b(&ldt_info); if (oldmode) diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/module.c linux-2.6.19.2/arch/i386/kernel/module.c --- linux-2.6.19.2.orig/arch/i386/kernel/module.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/module.c 2007-01-12 23:27:21.000000000 +0000 @@ -21,6 +21,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -32,9 +33,30 @@ { if (size == 0) return NULL; + +#ifdef CONFIG_PAX_KERNEXEC + return vmalloc(size); +#else return vmalloc_exec(size); +#endif + } +#ifdef CONFIG_PAX_KERNEXEC +void *module_alloc_exec(unsigned long size) +{ + struct vm_struct *area; + + if (size == 0) + return NULL; + + area = __get_vm_area(size, 0, (unsigned long)&MODULES_VADDR, (unsigned long)&MODULES_END); + if (area) + return area->addr; + + return NULL; +} +#endif /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) @@ -44,6 +66,45 @@ table entries. */ } +#ifdef CONFIG_PAX_KERNEXEC +void module_free_exec(struct module *mod, void *module_region) +{ + struct vm_struct **p, *tmp; + + if (!module_region) + return; + + if ((PAGE_SIZE-1) & (unsigned long)module_region) { + printk(KERN_ERR "Trying to module_free_exec() bad address (%p)\n", module_region); + WARN_ON(1); + return; + } + + write_lock(&vmlist_lock); + for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) + if (tmp->addr == module_region) + break; + + if (tmp) { + unsigned long cr0; + + pax_open_kernel(cr0); + memset(tmp->addr, 0xCC, tmp->size); + pax_close_kernel(cr0); + + *p = tmp->next; + kfree(tmp); + } + write_unlock(&vmlist_lock); + + if (!tmp) { + printk(KERN_ERR "Trying to module_free_exec() nonexistent vm area (%p)\n", + module_region); + WARN_ON(1); + } +} +#endif + /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -62,14 +123,16 @@ unsigned int i; Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym; - uint32_t *location; + uint32_t *plocation, location; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; + plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + location = (uint32_t)plocation; + if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR) + plocation = (void *)plocation + __KERNEL_TEXT_OFFSET; /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr @@ -78,11 +141,11 @@ switch (ELF32_R_TYPE(rel[i].r_info)) { case R_386_32: /* We add the value into the location given */ - *location += sym->st_value; + *plocation += sym->st_value; break; case R_386_PC32: /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; + *plocation += sym->st_value - location; break; default: printk(KERN_ERR "module %s: Unknown relocation: %u\n", diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/process.c linux-2.6.19.2/arch/i386/kernel/process.c --- linux-2.6.19.2.orig/arch/i386/kernel/process.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/process.c 2007-01-12 23:27:21.000000000 +0000 @@ -69,7 +69,7 @@ */ unsigned long thread_saved_pc(struct task_struct *tsk) { - return ((unsigned long *)tsk->thread.esp)[3]; + return tsk->thread.eip; } /* @@ -309,7 +309,7 @@ printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) + if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", regs->eflags, print_tainted(), init_utsname()->release, @@ -349,8 +349,8 @@ regs.ebx = (unsigned long) fn; regs.edx = (unsigned long) arg; - regs.xds = __USER_DS; - regs.xes = __USER_DS; + regs.xds = __KERNEL_DS; + regs.xes = __KERNEL_DS; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -371,7 +371,7 @@ struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; @@ -392,6 +392,9 @@ { struct task_struct *tsk = current; + __asm__("mov %0,%%fs\n" + "mov %0,%%gs\n" + : : "r" (0) : "memory"); memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); clear_tsk_thread_flag(tsk, TIF_DEBUG); @@ -425,7 +428,7 @@ struct task_struct *tsk; int err; - childregs = task_pt_regs(p); + childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8; *childregs = *regs; childregs->eax = 0; childregs->esp = esp; @@ -468,6 +471,11 @@ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) goto out; +#ifdef CONFIG_PAX_SEGMEXEC + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + goto out; +#endif + desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); @@ -647,7 +655,11 @@ struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -670,11 +682,24 @@ savesegment(fs, prev->fs); savesegment(gs, prev->gs); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!segment_eq(prev_p->thread_info->addr_limit, next_p->thread_info->addr_limit)) + __set_fs(next_p->thread_info->addr_limit, cpu); +#endif + /* * Load the per-thread Thread-Local Storage descriptor. */ load_TLS(next, cpu); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + /* * Restore %fs and %gs if needed. * @@ -819,8 +844,18 @@ struct desc_struct *desc; int cpu, idx; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + +#ifdef CONFIG_PAX_SEGMEXEC + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + idx = info.entry_number; /* @@ -852,8 +887,17 @@ desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + load_TLS(t, cpu); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); return 0; @@ -909,9 +953,27 @@ return 0; } -unsigned long arch_align_stack(unsigned long sp) +#ifdef CONFIG_PAX_RANDKSTACK +asmlinkage void pax_randomize_kstack(void) { - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; + struct tss_struct *tss = init_tss + smp_processor_id(); + unsigned long time; + + if (!randomize_va_space) + return; + + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x1EUL; + time <<= 2; +#else + time &= 0xFUL; + time <<= 3; +#endif + + tss->esp0 ^= time; + current->thread.esp0 = tss->esp0; } +#endif diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/ptrace.c linux-2.6.19.2/arch/i386/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/i386/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/ptrace.c 2007-01-12 23:27:21.000000000 +0000 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -169,15 +170,15 @@ * and APM bios ones we just ignore here. */ if (seg & LDT_SEGMENT) { - u32 *desc; + struct desc_struct *desc; unsigned long base; down(&child->mm->context.sem); - desc = child->mm->context.ldt + (seg & ~7); - base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000); + desc = &child->mm->context.ldt[seg >> 3]; + base = (desc->a >> 16) | ((desc->b & 0xff) << 16) | (desc->b & 0xff000000); /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) + if (!((desc->b >> 22) & 1)) addr &= 0xffff; addr += base; up(&child->mm->context.sem); @@ -342,6 +343,11 @@ if (copy_from_user(&info, user_desc, sizeof(info))) return -EFAULT; +#ifdef CONFIG_PAX_SEGMEXEC + if ((child->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; @@ -432,6 +438,17 @@ if(addr == (long) &dummy->u_debugreg[5]) break; if(addr < (long) &dummy->u_debugreg[4] && ((unsigned long) data) >= TASK_SIZE-3) break; + +#ifdef CONFIG_GRKERNSEC + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[3]){ + long reg = (addr - (long) &dummy->u_debugreg[0]) >> 2; + long type = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 4*reg)) & 3; + long align = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 2 + 4*reg)) & 3; + if((type & 1) && (data & align)) + break; + } +#endif /* Sanity-check data. Take one half-byte at once with * check = (val >> (16 + 4*i)) & 0xf. It contains the @@ -648,7 +665,7 @@ info.si_code = TRAP_BRKPT; /* User-mode eip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; + info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; /* Send us the fakey SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/reboot.c linux-2.6.19.2/arch/i386/kernel/reboot.c --- linux-2.6.19.2.orig/arch/i386/kernel/reboot.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/reboot.c 2007-01-12 23:27:21.000000000 +0000 @@ -24,7 +24,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static int reboot_mode; +static unsigned short reboot_mode; static int reboot_thru_bios; #ifdef CONFIG_SMP @@ -137,15 +137,15 @@ doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct Xgt_desc_struct +static const struct Xgt_desc_struct real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, real_mode_idt = { 0x3ff, 0 }, no_idt = { 0, 0 }; @@ -170,7 +170,7 @@ More could be done here to set up the registers as if a CPU reset had occurred; hopefully real BIOSs don't assume much. */ -static unsigned char real_mode_switch [] = +static const unsigned char real_mode_switch [] = { 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ @@ -184,7 +184,7 @@ 0x24, 0x10, /* f: andb $0x10,al */ 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ }; -static unsigned char jump_to_bios [] = +static const unsigned char jump_to_bios [] = { 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ }; @@ -194,10 +194,14 @@ * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(unsigned char *code, int length) +void machine_real_restart(const unsigned char *code, unsigned int length) { unsigned long flags; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST @@ -218,8 +222,16 @@ from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + USER_PGD_PTRS >= KERNEL_PGD_PTRS ? KERNEL_PGD_PTRS : USER_PGD_PTRS); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif /* * Use `swapper_pg_dir' as our page directory. @@ -232,7 +244,7 @@ REBOOT.COM programs, and the previous reset routine did this too. */ - *((unsigned short *)0x472) = reboot_mode; + __put_user(reboot_mode, (unsigned short __user *)0x472); /* For the switch to real mode, copy some code to low memory. It has to be in the first 64k because it is running in 16-bit mode, and it @@ -240,9 +252,9 @@ off paging. Copy it near the end of the first page, out of the way of BIOS variables. */ - memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), + flags = __copy_to_user ((void __user *) (0x1000 - sizeof (real_mode_switch) - 100), real_mode_switch, sizeof (real_mode_switch)); - memcpy ((void *) (0x1000 - 100), code, length); + flags = __copy_to_user ((void __user *) (0x1000 - 100), code, length); /* Set up the IDT for real mode. */ @@ -324,7 +336,7 @@ __asm__ __volatile__("int3"); } /* rebooting needs to touch the page at absolute addr 0 */ - *((unsigned short *)__va(0x472)) = reboot_mode; + __put_user(reboot_mode, (unsigned short __user *)0x472); for (;;) { mach_reboot_fixups(); /* for board specific fixups */ mach_reboot(); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/setup.c linux-2.6.19.2/arch/i386/kernel/setup.c --- linux-2.6.19.2.orig/arch/i386/kernel/setup.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/setup.c 2007-01-12 23:27:21.000000000 +0000 @@ -88,7 +88,11 @@ struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); +#ifdef CONFIG_X86_PAE +unsigned long mmu_cr4_features = X86_CR4_PAE; +#else unsigned long mmu_cr4_features; +#endif /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; @@ -1388,14 +1392,14 @@ if (!MOUNT_ROOT_RDONLY) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; + init_mm.start_code = (unsigned long) _text + __KERNEL_TEXT_OFFSET; + init_mm.end_code = (unsigned long) _etext + __KERNEL_TEXT_OFFSET; init_mm.end_data = (unsigned long) _edata; init_mm.brk = init_pg_tables_end + PAGE_OFFSET; - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); + code_resource.start = virt_to_phys(_text + __KERNEL_TEXT_OFFSET); + code_resource.end = virt_to_phys(_etext + __KERNEL_TEXT_OFFSET)-1; + data_resource.start = virt_to_phys(_data); data_resource.end = virt_to_phys(_edata)-1; parse_early_param(); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/signal.c linux-2.6.19.2/arch/i386/kernel/signal.c --- linux-2.6.19.2.orig/arch/i386/kernel/signal.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/signal.c 2007-01-12 23:27:21.000000000 +0000 @@ -351,7 +351,7 @@ goto give_sigsegv; } - restorer = (void *)VDSO_SYM(&__kernel_sigreturn); + restorer = (void __user *)VDSO_SYM(&__kernel_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -447,7 +447,8 @@ goto give_sigsegv; /* Set up to return from userspace. */ - restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn); + + restorer = (void __user *)VDSO_SYM(&__kernel_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); @@ -580,7 +581,7 @@ * before reaching here, so testing against kernel * CS suffices. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) return; if (test_thread_flag(TIF_RESTORE_SIGMASK)) diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/smpboot.c linux-2.6.19.2/arch/i386/kernel/smpboot.c --- linux-2.6.19.2.orig/arch/i386/kernel/smpboot.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/smpboot.c 2007-01-12 23:27:21.000000000 +0000 @@ -1066,7 +1066,6 @@ struct warm_boot_cpu_info info; struct work_struct task; int apicid, ret; - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { @@ -1078,13 +1077,7 @@ * the CPU isn't initialized at boot time, allocate gdt table here. * cpu_init will initialize it */ - if (!cpu_gdt_descr->address) { - cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL); - if (!cpu_gdt_descr->address) - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - ret = -ENOMEM; - goto exit; - } + cpu_gdt_descr[cpu].address = get_cpu_gdt_table(cpu); info.complete = &done; info.apicid = apicid; @@ -1095,7 +1088,7 @@ /* init low mem mapping */ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + min(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); flush_tlb_all(); schedule_work(&task); wait_for_completion(&done); diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/sys_i386.c linux-2.6.19.2/arch/i386/kernel/sys_i386.c --- linux-2.6.19.2.orig/arch/i386/kernel/sys_i386.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/sys_i386.c 2007-01-12 23:27:21.000000000 +0000 @@ -100,6 +100,191 @@ return err; } +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE) && start_addr >= mm->mmap_base) { + start_addr = 0x00110000UL; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + start_addr += mm->delta_mmap & 0x03FFF000UL; +#endif + + if (mm->start_brk <= start_addr && start_addr < mm->mmap_base) + start_addr = addr = mm->mmap_base; + else + addr = start_addr; + } +#endif + +full_search: + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + if (mm->start_brk <= addr && addr < mm->mmap_base) { + start_addr = addr = mm->mmap_base; + goto full_search; + } + } +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + /* requested length too big for entire address space */ + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) + goto bottomup; +#endif + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + + if (mm->mmap_base < len) + goto bottomup; + + addr = mm->mmap_base-len; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (!vma || addr+len <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + } while (len < vma->vm_start); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->mmap_base = base; + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +} struct sel_arg_struct { unsigned long n; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/syscall_table.S linux-2.6.19.2/arch/i386/kernel/syscall_table.S --- linux-2.6.19.2.orig/arch/i386/kernel/syscall_table.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/syscall_table.S 2007-01-12 23:27:21.000000000 +0000 @@ -1,3 +1,4 @@ +.section .rodata,"a",@progbits ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/sysenter.c linux-2.6.19.2/arch/i386/kernel/sysenter.c --- linux-2.6.19.2.orig/arch/i386/kernel/sysenter.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/sysenter.c 2007-01-12 23:27:21.000000000 +0000 @@ -45,7 +45,7 @@ void enable_sep_cpu(void) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; if (!boot_cpu_has(X86_FEATURE_SEP)) { put_cpu(); @@ -125,16 +125,36 @@ unsigned long addr; int ret; +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct *vma_m = NULL; +#endif + + vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + if (!vma) + return -ENOMEM; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) { + vma_m = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + if (!vma_m) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + } +#endif + down_write(&mm->mmap_sem); - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, MAP_EXECUTABLE); if (IS_ERR_VALUE(addr)) { ret = addr; - goto up_fail; - } - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) { - ret = -ENOMEM; + kmem_cache_free(vm_area_cachep, vma); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) + kmem_cache_free(vm_area_cachep, vma_m); +#endif + goto up_fail; } @@ -142,18 +162,49 @@ vma->vm_end = addr + PAGE_SIZE; /* MAYWRITE to allow gdb to COW and set breakpoints */ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) + vma->vm_flags &= ~VM_MAYWRITE; +#endif + vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + vma->vm_page_prot = protection_map[vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; vma->vm_ops = &syscall_vm_ops; vma->vm_mm = mm; ret = insert_vm_struct(mm, vma); if (unlikely(ret)) { kmem_cache_free(vm_area_cachep, vma); + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) + kmem_cache_free(vm_area_cachep, vma_m); +#endif + goto up_fail; } - current->mm->context.vdso = (void *)addr; +#ifdef CONFIG_PAX_SEGMEXEC + if (vma_m) { + *vma_m = *vma; + vma_m->vm_start += SEGMEXEC_TASK_SIZE; + vma_m->vm_end += SEGMEXEC_TASK_SIZE; + ret = insert_vm_struct(mm, vma_m); + if (unlikely(ret)) { + kmem_cache_free(vm_area_cachep, vma_m); + goto up_fail; + } + vma_m->vm_flags |= VM_MIRROR; + vma->vm_flags |= VM_MIRROR; + vma_m->vm_mirror = vma->vm_start - vma_m->vm_start; + vma->vm_mirror = vma_m->vm_start - vma->vm_start; + vma_m->vm_pgoff = vma->vm_pgoff; + mm->total_vm++; + } +#endif + + current->mm->context.vdso = addr; current_thread_info()->sysenter_return = (void *)VDSO_SYM(&SYSENTER_RETURN); mm->total_vm++; @@ -164,8 +215,17 @@ const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + if (vma->vm_start == vma->vm_mm->context.vdso) return "[vdso]"; + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(vma->vm_mm->pax_flags & MF_PAX_SEGMEXEC) || !(vma->vm_flags & VM_MIRROR)) + return NULL; + + if (vma->vm_start + vma->vm_mirror == vma->vm_mm->context.vdso) + return "[vdso]"; +#endif + return NULL; } diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/time.c linux-2.6.19.2/arch/i386/kernel/time.c --- linux-2.6.19.2.orig/arch/i386/kernel/time.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/time.c 2007-01-12 23:27:21.000000000 +0000 @@ -133,7 +133,7 @@ unsigned long pc = instruction_pointer(regs); #ifdef CONFIG_SMP - if (!user_mode_vm(regs) && in_lock_functions(pc)) { + if (!user_mode(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->ebp + 4); #else @@ -345,7 +345,7 @@ .cls = &timer_sysclass, }; -static int time_init_device(void) +static int __init time_init_device(void) { int error = sysdev_class_register(&timer_sysclass); if (!error) diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/traps.c linux-2.6.19.2/arch/i386/kernel/traps.c --- linux-2.6.19.2.orig/arch/i386/kernel/traps.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/traps.c 2007-01-12 23:27:21.000000000 +0000 @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -61,18 +62,10 @@ asmlinkage int system_call(void); -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 } }; - /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +extern struct desc_struct idt_table[256]; asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -146,7 +139,7 @@ #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; - if (__kernel_text_address(addr)) + if (__kernel_text_address(addr + __KERNEL_TEXT_OFFSET)) ops->address(data, addr); } #endif @@ -351,7 +344,7 @@ esp = (unsigned long) (®s->esp); savesegment(ss, ss); - if (user_mode_vm(regs)) { + if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -382,13 +375,15 @@ u8 __user *eip; int code_bytes = 64; unsigned char c; + mm_segment_t old_fs = get_fs(); printk("\n" KERN_EMERG "Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); printk(KERN_EMERG "Code: "); - eip = (u8 __user *)regs->eip - 43; + set_fs(KERNEL_DS); + eip = (u8 __user *)regs->eip - 43 + __KERNEL_TEXT_OFFSET; if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { /* try starting at EIP */ eip = (u8 __user *)regs->eip; @@ -399,26 +394,29 @@ printk(" Bad EIP value."); break; } - if (eip == (u8 __user *)regs->eip) + if (eip == (u8 __user *)regs->eip + __KERNEL_TEXT_OFFSET) printk("<%02x> ", c); else printk("%02x ", c); } + set_fs(old_fs); } printk("\n"); } static void handle_BUG(struct pt_regs *regs) { - unsigned long eip = regs->eip; + unsigned long eip = regs->eip + __KERNEL_TEXT_OFFSET; unsigned short ud2; + mm_segment_t old_fs = get_fs(); + set_fs(KERNEL_DS); if (eip < PAGE_OFFSET) - return; + goto out; if (probe_kernel_address((unsigned short __user *)eip, ud2)) - return; + goto out; if (ud2 != 0x0b0f) - return; + goto out; printk(KERN_EMERG "------------[ cut here ]------------\n"); @@ -428,18 +426,21 @@ char *file; char c; - if (probe_kernel_address((unsigned short __user *)(eip + 2), - line)) + if (probe_kernel_address((unsigned short __user *)(eip + 7), line)) break; - if (__get_user(file, (char * __user *)(eip + 4)) || - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) + if (probe_kernel_address((char * __user *)(eip + 3), file) || + file < _text + __KERNEL_TEXT_OFFSET) + break; + if (probe_kernel_address(file, c)) file = ""; - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); - return; + goto out; } while (0); #endif printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n"); + +out: + set_fs(old_fs); } /* This is gone through when something in the kernel @@ -538,7 +539,7 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!user_mode_vm(regs)) + if (!user_mode(regs)) die(str, regs, err); } @@ -556,7 +557,7 @@ goto trap_signal; } - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto kernel_trap; trap_signal: { @@ -644,7 +645,7 @@ long error_code) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = &init_tss[cpu]; struct thread_struct *thread = ¤t->thread; /* @@ -680,9 +681,25 @@ if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto gp_in_kernel; +#ifdef CONFIG_PAX_PAGEEXEC + if (current->mm && (current->mm->pax_flags & MF_PAX_PAGEEXEC)) { + struct mm_struct *mm = current->mm; + unsigned long limit; + + down_write(&mm->mmap_sem); + limit = mm->context.user_cs_limit; + if (limit < TASK_SIZE) { + track_exec_limit(mm, limit, TASK_SIZE, PROT_EXEC); + up_write(&mm->mmap_sem); + return; + } + up_write(&mm->mmap_sem); + } +#endif + current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); @@ -698,6 +715,13 @@ if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 13, SIGSEGV) == NOTIFY_STOP) return; + +#ifdef CONFIG_PAX_KERNEXEC + if ((regs->xcs & 0xFFFF) == __KERNEL_CS) + die("PAX: suspicious general protection fault", regs, error_code); + else +#endif + die("general protection fault", regs, error_code); } } @@ -781,7 +805,7 @@ /* If we are in kernel we are probably nested up pretty bad * and might aswell get out now while we still can. */ - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { current->thread.trap_no = 2; crash_kexec(regs); } @@ -913,7 +937,7 @@ * check for kernel mode by just checking the CPL * of CS. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto clear_TF_reenable; } @@ -1189,7 +1213,19 @@ */ void set_intr_gate(unsigned int n, void *addr) { + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } /* diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/vm86.c linux-2.6.19.2/arch/i386/kernel/vm86.c --- linux-2.6.19.2.orig/arch/i386/kernel/vm86.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/vm86.c 2007-01-12 23:27:21.000000000 +0000 @@ -122,7 +122,7 @@ do_exit(SIGSEGV); } - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; load_esp0(tss, ¤t->thread); @@ -296,7 +296,7 @@ savesegment(fs, tsk->thread.saved_fs); savesegment(gs, tsk->thread.saved_gs); - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; diff -Naur linux-2.6.19.2.orig/arch/i386/kernel/vmlinux.lds.S linux-2.6.19.2/arch/i386/kernel/vmlinux.lds.S --- linux-2.6.19.2.orig/arch/i386/kernel/vmlinux.lds.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/kernel/vmlinux.lds.S 2007-01-12 23:27:21.000000000 +0000 @@ -8,6 +8,13 @@ #include #include #include +#include + +#ifdef CONFIG_X86_PAE +#define PMD_SHIFT 21 +#else +#define PMD_SHIFT 22 +#endif OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -16,31 +23,133 @@ PHDRS { text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ + data PT_LOAD FLAGS(6); /* RW_ */ note PT_NOTE FLAGS(4); /* R__ */ } SECTIONS { . = __KERNEL_START; - phys_startup_32 = startup_32 - LOAD_OFFSET; + phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; + + .text.startup : AT(ADDR(.text.startup) - LOAD_OFFSET) { + BYTE(0xEA) /* jmp far */ + LONG(phys_startup_32) + SHORT(__BOOT_CS) + } :text = 0x9090 + + /* might get freed after init */ + . = ALIGN(4096); + __smp_alt_begin = .; + __smp_alt_instructions = .; + .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + *(.smp_altinstructions) + } + __smp_alt_instructions_end = .; + . = ALIGN(4); + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) + } + __smp_locks_end = .; + .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { + *(.smp_altinstr_replacement) + } + . = ALIGN(4096); + __smp_alt_end = .; + + /* will be freed after init */ + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } + . = ALIGN(16); + __setup_start = .; + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } + __setup_end = .; + __initcall_start = .; + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + INITCALLS + } + __initcall_end = .; + __con_initcall_start = .; + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + *(.con_initcall.init) + } + __con_initcall_end = .; + SECURITY_INIT + . = ALIGN(4); + __alt_instructions = .; + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + *(.altinstructions) + } + __alt_instructions_end = .; + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } + + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } + . = ALIGN(4096); + __initramfs_start = .; + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } + __initramfs_end = .; + . = ALIGN(L1_CACHE_BYTES); + __per_cpu_start = .; + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } + __per_cpu_end = .; + /* read-only */ + + . = ALIGN(4096); + .init.text (. - __KERNEL_TEXT_OFFSET) : AT(ADDR(.init.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + /* .exit.text is discard at runtime, not link time, to deal with references + from .altinstructions and .eh_frame */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { *(.exit.text) } + +#ifdef CONFIG_PAX_KERNEXEC + .text.align : AT(ADDR(.text.align) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + . = ALIGN(__KERNEL_TEXT_OFFSET - LOAD_OFFSET) - 1; + BYTE(0) + } +#else + . = ALIGN(4096); +#endif + + __init_end = . + __KERNEL_TEXT_OFFSET; + /* freed after init ends here */ + _text = .; /* Text and read-only data */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { + .text : AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { *(.text) SCHED_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) *(.gnu.warning) - } :text = 0x9090 + } _etext = .; /* End of text section */ - + . += __KERNEL_TEXT_OFFSET; . = ALIGN(16); /* Exception table */ __start___ex_table = .; __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } __stop___ex_table = .; + . = ALIGN(4096); + .rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) { + *(.empty_zero_page) + +#ifdef CONFIG_X86_PAE + *(.swapper_pm_dir) +#endif + + *(.swapper_pg_dir) + *(.idt) + } + RODATA . = ALIGN(4); @@ -50,9 +159,25 @@ } __tracedata_end = .; +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(4096); + MODULES_VADDR = .; + + .module.text : AT(ADDR(.module.text) - LOAD_OFFSET) { + . += (4 * 1024 * 1024); + . = ALIGN(1 << PMD_SHIFT) - 1; + BYTE(0) + } + + MODULES_END = .; +#else + . = ALIGN(32); +#endif + /* writeable */ . = ALIGN(4096); .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + _data = .; *(.data) CONSTRUCTORS } :data @@ -63,11 +188,6 @@ . = ALIGN(4096); __nosave_end = .; - . = ALIGN(4096); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.idt) - } - . = ALIGN(32); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) @@ -76,7 +196,6 @@ /* rarely changed data like cpu maps */ . = ALIGN(32); .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } - _edata = .; /* End of data section */ #ifdef CONFIG_STACK_UNWIND . = ALIGN(4); @@ -92,75 +211,9 @@ *(.data.init_task) } - /* might get freed after init */ - . = ALIGN(4096); - __smp_alt_begin = .; - __smp_alt_instructions = .; - .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { - *(.smp_altinstructions) - } - __smp_alt_instructions_end = .; - . = ALIGN(4); - __smp_locks = .; - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - *(.smp_locks) - } - __smp_locks_end = .; - .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { - *(.smp_altinstr_replacement) - } - . = ALIGN(4096); - __smp_alt_end = .; + _edata = .; /* End of data section */ - /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - *(.init.text) - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } - . = ALIGN(16); - __setup_start = .; - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } - __setup_end = .; - __initcall_start = .; - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INITCALLS - } - __initcall_end = .; - __con_initcall_start = .; - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - *(.con_initcall.init) - } - __con_initcall_end = .; - SECURITY_INIT - . = ALIGN(4); - __alt_instructions = .; - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - *(.altinstructions) - } - __alt_instructions_end = .; - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } - . = ALIGN(4096); - __initramfs_start = .; - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } - __initramfs_end = .; - . = ALIGN(L1_CACHE_BYTES); - __per_cpu_start = .; - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } - __per_cpu_end = .; . = ALIGN(4096); - __init_end = .; - /* freed after init ends here */ - __bss_start = .; /* BSS */ .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) { *(.bss.page_aligned) diff -Naur linux-2.6.19.2.orig/arch/i386/lib/checksum.S linux-2.6.19.2/arch/i386/lib/checksum.S --- linux-2.6.19.2.orig/arch/i386/lib/checksum.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/checksum.S 2007-01-12 23:27:21.000000000 +0000 @@ -26,7 +26,8 @@ */ #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -280,12 +281,23 @@ .align 4 .globl csum_partial_copy_generic - +.globl csum_partial_copy_generic_to_user +.globl csum_partial_copy_generic_from_user + #ifndef CONFIG_X86_USE_PPRO_CHECKSUM #define ARGBASE 16 #define FP 12 - + +csum_partial_copy_generic_to_user: + pushl $(__USER_DS) + popl %es + jmp csum_partial_copy_generic + +csum_partial_copy_generic_from_user: + pushl $(__USER_DS) + popl %ds + csum_partial_copy_generic: subl $4,%esp pushl %edi @@ -304,7 +316,7 @@ jmp 4f SRC(1: movw (%esi), %bx ) addl $2, %esi -DST( movw %bx, (%edi) ) +DST( movw %bx, %es:(%edi) ) addl $2, %edi addw %bx, %ax adcl $0, %eax @@ -316,30 +328,30 @@ SRC(1: movl (%esi), %ebx ) SRC( movl 4(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) adcl %edx, %eax -DST( movl %edx, 4(%edi) ) +DST( movl %edx, %es:4(%edi) ) SRC( movl 8(%esi), %ebx ) SRC( movl 12(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) +DST( movl %ebx, %es:8(%edi) ) adcl %edx, %eax -DST( movl %edx, 12(%edi) ) +DST( movl %edx, %es:12(%edi) ) SRC( movl 16(%esi), %ebx ) SRC( movl 20(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) +DST( movl %ebx, %es:16(%edi) ) adcl %edx, %eax -DST( movl %edx, 20(%edi) ) +DST( movl %edx, %es:20(%edi) ) SRC( movl 24(%esi), %ebx ) SRC( movl 28(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) +DST( movl %ebx, %es:24(%edi) ) adcl %edx, %eax -DST( movl %edx, 28(%edi) ) +DST( movl %edx, %es:28(%edi) ) lea 32(%esi), %esi lea 32(%edi), %edi @@ -353,7 +365,7 @@ shrl $2, %edx # This clears CF SRC(3: movl (%esi), %ebx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) lea 4(%esi), %esi lea 4(%edi), %edi dec %edx @@ -365,12 +377,12 @@ jb 5f SRC( movw (%esi), %cx ) leal 2(%esi), %esi -DST( movw %cx, (%edi) ) +DST( movw %cx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%ecx SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) +DST( movb %cl, %es:(%edi) ) 6: addl %ecx, %eax adcl $0, %eax 7: @@ -381,7 +393,7 @@ 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination - computing the rest # is too much work @@ -394,11 +406,15 @@ 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) + movl $-EFAULT,%ss:(%ebx) jmp 5000b .previous + pushl %ss + popl %ds + pushl %ss + popl %es popl %ebx popl %esi popl %edi @@ -410,17 +426,28 @@ /* Version for PentiumII/PPro */ #define ROUND1(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)); #define ROUND(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)); #define ARGBASE 12 - + +csum_partial_copy_generic_to_user: + pushl $(__USER_DS) + popl %es + jmp csum_partial_copy_generic + +csum_partial_copy_generic_from_user: + pushl $(__USER_DS) + popl %ds + csum_partial_copy_generic: pushl %ebx pushl %edi @@ -439,7 +466,7 @@ subl %ebx, %edi lea -1(%esi),%edx andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx + lea 3f(%ebx,%ebx,2), %ebx testl %esi, %esi jmp *%ebx 1: addl $64,%esi @@ -460,19 +487,19 @@ jb 5f SRC( movw (%esi), %dx ) leal 2(%esi), %esi -DST( movw %dx, (%edi) ) +DST( movw %dx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%edx 5: SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) +DST( movb %dl, %es:(%edi) ) 6: addl %edx, %eax adcl $0, %eax 7: .section .fixup, "ax" 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination (computing the rest is too much work) movl ARGBASE+8(%esp),%edi # dst movl ARGBASE+12(%esp),%ecx # len @@ -480,10 +507,14 @@ rep; stosb jmp 7b 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) jmp 7b .previous + pushl %ss + popl %ds + pushl %ss + popl %es popl %esi popl %edi popl %ebx diff -Naur linux-2.6.19.2.orig/arch/i386/lib/getuser.S linux-2.6.19.2/arch/i386/lib/getuser.S --- linux-2.6.19.2.orig/arch/i386/lib/getuser.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/getuser.S 2007-01-12 23:27:21.000000000 +0000 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -30,8 +31,12 @@ GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 1: movzbl (%eax),%edx xorl %eax,%eax + pushl %ss + pop %ds ret .align 4 @@ -42,7 +47,11 @@ GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 2: movzwl -1(%eax),%edx + pushl %ss + pop %ds xorl %eax,%eax ret @@ -54,11 +63,17 @@ GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 3: movl -3(%eax),%edx + pushl %ss + pop %ds xorl %eax,%eax ret bad_get_user: + pushl %ss + pop %ds xorl %edx,%edx movl $-14,%eax ret diff -Naur linux-2.6.19.2.orig/arch/i386/lib/mmx.c linux-2.6.19.2/arch/i386/lib/mmx.c --- linux-2.6.19.2.orig/arch/i386/lib/mmx.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/mmx.c 2007-01-12 23:27:21.000000000 +0000 @@ -47,14 +47,30 @@ " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(; i>5; i--) @@ -78,14 +94,30 @@ " movq %%mm2, 48(%1)\n" " movq %%mm3, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } @@ -178,14 +210,30 @@ " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(i=0; i<(4096-320)/64; i++) { @@ -208,14 +256,30 @@ " movq 56(%0), %%mm7\n" " movntq %%mm7, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } @@ -308,14 +372,30 @@ " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : : "r" (from) : "ax"); for(i=0; i<4096/64; i++) { @@ -338,14 +418,30 @@ " movq %%mm2, 48(%1)\n" " movq %%mm3, 56(%1)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " cli\n" + " movl %%cr0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " orl $0x00010000, %%eax\n" + " movl %%eax, %%cr0\n" + " sti\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } diff -Naur linux-2.6.19.2.orig/arch/i386/lib/putuser.S linux-2.6.19.2/arch/i386/lib/putuser.S --- linux-2.6.19.2.orig/arch/i386/lib/putuser.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/putuser.S 2007-01-12 23:27:21.000000000 +0000 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -33,7 +34,11 @@ ENTER cmpl TI_addr_limit(%ebx),%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 1: movb %al,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -45,7 +50,11 @@ subl $1,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 2: movw %ax,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -57,7 +66,11 @@ subl $3,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 3: movl %eax,(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT @@ -69,12 +82,18 @@ subl $7,%ebx cmpl %ebx,%ecx jae bad_put_user + pushl $(__USER_DS) + popl %ds 4: movl %eax,(%ecx) 5: movl %edx,4(%ecx) + pushl %ss + popl %ds xorl %eax,%eax EXIT bad_put_user: + pushl %ss + popl %ds movl $-14,%eax EXIT diff -Naur linux-2.6.19.2.orig/arch/i386/lib/usercopy.c linux-2.6.19.2/arch/i386/lib/usercopy.c --- linux-2.6.19.2.orig/arch/i386/lib/usercopy.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/lib/usercopy.c 2007-01-12 23:27:21.000000000 +0000 @@ -28,34 +28,41 @@ * Copy a null terminated string from userspace. */ -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - int __d0, __d1, __d2; \ - might_sleep(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - ".previous" \ - : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) +static long __do_strncpy_from_user(char *dst, const char __user *src, long count) +{ + int __d0, __d1, __d2; + long res = -EFAULT; + + might_sleep(); + __asm__ __volatile__( + " movw %w10,%%ds\n" + " testl %1,%1\n" + " jz 2f\n" + "0: lodsb\n" + " stosb\n" + " testb %%al,%%al\n" + " jz 1f\n" + " decl %1\n" + " jnz 0b\n" + "1: subl %1,%0\n" + "2:\n" + " pushl %%ss\n" + " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "3: movl %5,%0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,3b\n" + ".previous" + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), + "=&D" (__d2) + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst), + "r"(__USER_DS) + : "memory"); + return res; +} /** * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. @@ -80,9 +87,7 @@ long __strncpy_from_user(char *dst, const char __user *src, long count) { - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; + return __do_strncpy_from_user(dst, src, count); } EXPORT_SYMBOL(__strncpy_from_user); @@ -109,7 +114,7 @@ { long res = -EFAULT; if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); + res = __do_strncpy_from_user(dst, src, count); return res; } EXPORT_SYMBOL(strncpy_from_user); @@ -118,27 +123,33 @@ * Zero Userspace */ -#define __do_clear_user(addr,size) \ -do { \ - int __d0; \ - might_sleep(); \ - __asm__ __volatile__( \ - "0: rep; stosl\n" \ - " movl %2,%0\n" \ - "1: rep; stosb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%2,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0) \ - : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ -} while (0) +static unsigned long __do_clear_user(void __user *addr, unsigned long size) +{ + int __d0; + + might_sleep(); + __asm__ __volatile__( + " movw %w6,%%es\n" + "0: rep; stosl\n" + " movl %2,%0\n" + "1: rep; stosb\n" + "2:\n" + " pushl %%ss\n" + " popl %%es\n" + ".section .fixup,\"ax\"\n" + "3: lea 0(%2,%0,4),%0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,3b\n" + " .long 1b,2b\n" + ".previous" + : "=&c"(size), "=&D" (__d0) + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0), + "r"(__USER_DS)); + return size; +} /** * clear_user: - Zero a block of memory in user space. @@ -155,7 +166,7 @@ { might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) - __do_clear_user(to, n); + n = __do_clear_user(to, n); return n; } EXPORT_SYMBOL(clear_user); @@ -174,8 +185,7 @@ unsigned long __clear_user(void __user *to, unsigned long n) { - __do_clear_user(to, n); - return n; + return __do_clear_user(to, n); } EXPORT_SYMBOL(__clear_user); @@ -198,14 +208,17 @@ might_sleep(); __asm__ __volatile__( + " movw %w8,%%es\n" " testl %0, %0\n" " jz 3f\n" - " andl %0,%%ecx\n" + " movl %0,%%ecx\n" "0: repne; scasb\n" " setne %%al\n" " subl %%ecx,%0\n" " addl %0,%%eax\n" "1:\n" + " pushl %%ss\n" + " popl %%es\n" ".section .fixup,\"ax\"\n" "2: xorl %%eax,%%eax\n" " jmp 1b\n" @@ -217,7 +230,7 @@ " .long 0b,2b\n" ".previous" :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) - :"0" (n), "1" (s), "2" (0), "3" (mask) + :"0" (n), "1" (s), "2" (0), "3" (mask), "r" (__USER_DS) :"cc"); return res & mask; } @@ -225,10 +238,121 @@ #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long -__copy_user_intel(void __user *to, const void *from, unsigned long size) +__generic_copy_to_user_intel(void __user *to, const void *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " movw %w6, %%es\n" + " .align 2,0x90\n" + "1: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 3f\n" + "2: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "3: movl 0(%4), %%eax\n" + "4: movl 4(%4), %%edx\n" + "5: movl %%eax, %%es:0(%3)\n" + "6: movl %%edx, %%es:4(%3)\n" + "7: movl 8(%4), %%eax\n" + "8: movl 12(%4),%%edx\n" + "9: movl %%eax, %%es:8(%3)\n" + "10: movl %%edx, %%es:12(%3)\n" + "11: movl 16(%4), %%eax\n" + "12: movl 20(%4), %%edx\n" + "13: movl %%eax, %%es:16(%3)\n" + "14: movl %%edx, %%es:20(%3)\n" + "15: movl 24(%4), %%eax\n" + "16: movl 28(%4), %%edx\n" + "17: movl %%eax, %%es:24(%3)\n" + "18: movl %%edx, %%es:28(%3)\n" + "19: movl 32(%4), %%eax\n" + "20: movl 36(%4), %%edx\n" + "21: movl %%eax, %%es:32(%3)\n" + "22: movl %%edx, %%es:36(%3)\n" + "23: movl 40(%4), %%eax\n" + "24: movl 44(%4), %%edx\n" + "25: movl %%eax, %%es:40(%3)\n" + "26: movl %%edx, %%es:44(%3)\n" + "27: movl 48(%4), %%eax\n" + "28: movl 52(%4), %%edx\n" + "29: movl %%eax, %%es:48(%3)\n" + "30: movl %%edx, %%es:52(%3)\n" + "31: movl 56(%4), %%eax\n" + "32: movl 60(%4), %%edx\n" + "33: movl %%eax, %%es:56(%3)\n" + "34: movl %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 1b\n" + "35: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "99: rep; movsl\n" + "36: movl %%eax, %0\n" + "37: rep; movsb\n" + "100:\n" + " pushl %%ss\n" + " popl %%es\n" + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,100b\n" + " .long 2b,100b\n" + " .long 3b,100b\n" + " .long 4b,100b\n" + " .long 5b,100b\n" + " .long 6b,100b\n" + " .long 7b,100b\n" + " .long 8b,100b\n" + " .long 9b,100b\n" + " .long 10b,100b\n" + " .long 11b,100b\n" + " .long 12b,100b\n" + " .long 13b,100b\n" + " .long 14b,100b\n" + " .long 15b,100b\n" + " .long 16b,100b\n" + " .long 17b,100b\n" + " .long 18b,100b\n" + " .long 19b,100b\n" + " .long 20b,100b\n" + " .long 21b,100b\n" + " .long 22b,100b\n" + " .long 23b,100b\n" + " .long 24b,100b\n" + " .long 25b,100b\n" + " .long 26b,100b\n" + " .long 27b,100b\n" + " .long 28b,100b\n" + " .long 29b,100b\n" + " .long 30b,100b\n" + " .long 31b,100b\n" + " .long 32b,100b\n" + " .long 33b,100b\n" + " .long 34b,100b\n" + " .long 35b,100b\n" + " .long 36b,100b\n" + " .long 37b,100b\n" + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long +__generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size) { int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "1: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -237,36 +361,36 @@ " .align 2,0x90\n" "3: movl 0(%4), %%eax\n" "4: movl 4(%4), %%edx\n" - "5: movl %%eax, 0(%3)\n" - "6: movl %%edx, 4(%3)\n" + "5: movl %%eax, %%es:0(%3)\n" + "6: movl %%edx, %%es:4(%3)\n" "7: movl 8(%4), %%eax\n" "8: movl 12(%4),%%edx\n" - "9: movl %%eax, 8(%3)\n" - "10: movl %%edx, 12(%3)\n" + "9: movl %%eax, %%es:8(%3)\n" + "10: movl %%edx, %%es:12(%3)\n" "11: movl 16(%4), %%eax\n" "12: movl 20(%4), %%edx\n" - "13: movl %%eax, 16(%3)\n" - "14: movl %%edx, 20(%3)\n" + "13: movl %%eax, %%es:16(%3)\n" + "14: movl %%edx, %%es:20(%3)\n" "15: movl 24(%4), %%eax\n" "16: movl 28(%4), %%edx\n" - "17: movl %%eax, 24(%3)\n" - "18: movl %%edx, 28(%3)\n" + "17: movl %%eax, %%es:24(%3)\n" + "18: movl %%edx, %%es:28(%3)\n" "19: movl 32(%4), %%eax\n" "20: movl 36(%4), %%edx\n" - "21: movl %%eax, 32(%3)\n" - "22: movl %%edx, 36(%3)\n" + "21: movl %%eax, %%es:32(%3)\n" + "22: movl %%edx, %%es:36(%3)\n" "23: movl 40(%4), %%eax\n" "24: movl 44(%4), %%edx\n" - "25: movl %%eax, 40(%3)\n" - "26: movl %%edx, 44(%3)\n" + "25: movl %%eax, %%es:40(%3)\n" + "26: movl %%edx, %%es:44(%3)\n" "27: movl 48(%4), %%eax\n" "28: movl 52(%4), %%edx\n" - "29: movl %%eax, 48(%3)\n" - "30: movl %%edx, 52(%3)\n" + "29: movl %%eax, %%es:48(%3)\n" + "30: movl %%edx, %%es:52(%3)\n" "31: movl 56(%4), %%eax\n" "32: movl 60(%4), %%edx\n" - "33: movl %%eax, 56(%3)\n" - "34: movl %%edx, 60(%3)\n" + "33: movl %%eax, %%es:56(%3)\n" + "34: movl %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -280,6 +404,8 @@ "36: movl %%eax, %0\n" "37: rep; movsb\n" "100:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "101: lea 0(%%eax,%0,4),%0\n" " jmp 100b\n" @@ -326,7 +452,7 @@ " .long 99b,101b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -336,6 +462,7 @@ { int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "0: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -344,36 +471,36 @@ " .align 2,0x90\n" "2: movl 0(%4), %%eax\n" "21: movl 4(%4), %%edx\n" - " movl %%eax, 0(%3)\n" - " movl %%edx, 4(%3)\n" + " movl %%eax, %%es:0(%3)\n" + " movl %%edx, %%es:4(%3)\n" "3: movl 8(%4), %%eax\n" "31: movl 12(%4),%%edx\n" - " movl %%eax, 8(%3)\n" - " movl %%edx, 12(%3)\n" + " movl %%eax, %%es:8(%3)\n" + " movl %%edx, %%es:12(%3)\n" "4: movl 16(%4), %%eax\n" "41: movl 20(%4), %%edx\n" - " movl %%eax, 16(%3)\n" - " movl %%edx, 20(%3)\n" + " movl %%eax, %%es:16(%3)\n" + " movl %%edx, %%es:20(%3)\n" "10: movl 24(%4), %%eax\n" "51: movl 28(%4), %%edx\n" - " movl %%eax, 24(%3)\n" - " movl %%edx, 28(%3)\n" + " movl %%eax, %%es:24(%3)\n" + " movl %%edx, %%es:28(%3)\n" "11: movl 32(%4), %%eax\n" "61: movl 36(%4), %%edx\n" - " movl %%eax, 32(%3)\n" - " movl %%edx, 36(%3)\n" + " movl %%eax, %%es:32(%3)\n" + " movl %%edx, %%es:36(%3)\n" "12: movl 40(%4), %%eax\n" "71: movl 44(%4), %%edx\n" - " movl %%eax, 40(%3)\n" - " movl %%edx, 44(%3)\n" + " movl %%eax, %%es:40(%3)\n" + " movl %%edx, %%es:44(%3)\n" "13: movl 48(%4), %%eax\n" "81: movl 52(%4), %%edx\n" - " movl %%eax, 48(%3)\n" - " movl %%edx, 52(%3)\n" + " movl %%eax, %%es:48(%3)\n" + " movl %%edx, %%es:52(%3)\n" "14: movl 56(%4), %%eax\n" "91: movl 60(%4), %%edx\n" - " movl %%eax, 56(%3)\n" - " movl %%edx, 60(%3)\n" + " movl %%eax, %%es:56(%3)\n" + " movl %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -387,6 +514,8 @@ " movl %%eax,%0\n" "7: rep; movsb\n" "8:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" "16: pushl %0\n" @@ -421,7 +550,7 @@ " .long 7b,16b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -437,6 +566,7 @@ int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "0: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -445,36 +575,36 @@ " .align 2,0x90\n" "2: movl 0(%4), %%eax\n" "21: movl 4(%4), %%edx\n" - " movnti %%eax, 0(%3)\n" - " movnti %%edx, 4(%3)\n" + " movnti %%eax, %%es:0(%3)\n" + " movnti %%edx, %%es:4(%3)\n" "3: movl 8(%4), %%eax\n" "31: movl 12(%4),%%edx\n" - " movnti %%eax, 8(%3)\n" - " movnti %%edx, 12(%3)\n" + " movnti %%eax, %%es:8(%3)\n" + " movnti %%edx, %%es:12(%3)\n" "4: movl 16(%4), %%eax\n" "41: movl 20(%4), %%edx\n" - " movnti %%eax, 16(%3)\n" - " movnti %%edx, 20(%3)\n" + " movnti %%eax, %%es:16(%3)\n" + " movnti %%edx, %%es:20(%3)\n" "10: movl 24(%4), %%eax\n" "51: movl 28(%4), %%edx\n" - " movnti %%eax, 24(%3)\n" - " movnti %%edx, 28(%3)\n" + " movnti %%eax, %%es:24(%3)\n" + " movnti %%edx, %%es:28(%3)\n" "11: movl 32(%4), %%eax\n" "61: movl 36(%4), %%edx\n" - " movnti %%eax, 32(%3)\n" - " movnti %%edx, 36(%3)\n" + " movnti %%eax, %%es:32(%3)\n" + " movnti %%edx, %%es:36(%3)\n" "12: movl 40(%4), %%eax\n" "71: movl 44(%4), %%edx\n" - " movnti %%eax, 40(%3)\n" - " movnti %%edx, 44(%3)\n" + " movnti %%eax, %%es:40(%3)\n" + " movnti %%edx, %%es:44(%3)\n" "13: movl 48(%4), %%eax\n" "81: movl 52(%4), %%edx\n" - " movnti %%eax, 48(%3)\n" - " movnti %%edx, 52(%3)\n" + " movnti %%eax, %%es:48(%3)\n" + " movnti %%edx, %%es:52(%3)\n" "14: movl 56(%4), %%eax\n" "91: movl 60(%4), %%edx\n" - " movnti %%eax, 56(%3)\n" - " movnti %%edx, 60(%3)\n" + " movnti %%eax, %%es:56(%3)\n" + " movnti %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -489,6 +619,8 @@ " movl %%eax,%0\n" "7: rep; movsb\n" "8:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" "16: pushl %0\n" @@ -523,7 +655,7 @@ " .long 7b,16b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -534,6 +666,7 @@ int d0, d1; __asm__ __volatile__( + " movw %w6, %%ds\n" " .align 2,0x90\n" "0: movl 32(%4), %%eax\n" " cmpl $67, %0\n" @@ -542,36 +675,36 @@ " .align 2,0x90\n" "2: movl 0(%4), %%eax\n" "21: movl 4(%4), %%edx\n" - " movnti %%eax, 0(%3)\n" - " movnti %%edx, 4(%3)\n" + " movnti %%eax, %%es:0(%3)\n" + " movnti %%edx, %%es:4(%3)\n" "3: movl 8(%4), %%eax\n" "31: movl 12(%4),%%edx\n" - " movnti %%eax, 8(%3)\n" - " movnti %%edx, 12(%3)\n" + " movnti %%eax, %%es:8(%3)\n" + " movnti %%edx, %%es:12(%3)\n" "4: movl 16(%4), %%eax\n" "41: movl 20(%4), %%edx\n" - " movnti %%eax, 16(%3)\n" - " movnti %%edx, 20(%3)\n" + " movnti %%eax, %%es:16(%3)\n" + " movnti %%edx, %%es:20(%3)\n" "10: movl 24(%4), %%eax\n" "51: movl 28(%4), %%edx\n" - " movnti %%eax, 24(%3)\n" - " movnti %%edx, 28(%3)\n" + " movnti %%eax, %%es:24(%3)\n" + " movnti %%edx, %%es:28(%3)\n" "11: movl 32(%4), %%eax\n" "61: movl 36(%4), %%edx\n" - " movnti %%eax, 32(%3)\n" - " movnti %%edx, 36(%3)\n" + " movnti %%eax, %%es:32(%3)\n" + " movnti %%edx, %%es:36(%3)\n" "12: movl 40(%4), %%eax\n" "71: movl 44(%4), %%edx\n" - " movnti %%eax, 40(%3)\n" - " movnti %%edx, 44(%3)\n" + " movnti %%eax, %%es:40(%3)\n" + " movnti %%edx, %%es:44(%3)\n" "13: movl 48(%4), %%eax\n" "81: movl 52(%4), %%edx\n" - " movnti %%eax, 48(%3)\n" - " movnti %%edx, 52(%3)\n" + " movnti %%eax, %%es:48(%3)\n" + " movnti %%edx, %%es:52(%3)\n" "14: movl 56(%4), %%eax\n" "91: movl 60(%4), %%edx\n" - " movnti %%eax, 56(%3)\n" - " movnti %%edx, 60(%3)\n" + " movnti %%eax, %%es:56(%3)\n" + " movnti %%edx, %%es:60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -586,6 +719,8 @@ " movl %%eax,%0\n" "7: rep; movsb\n" "8:\n" + " pushl %%ss\n" + " popl %%ds\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" "16: jmp 8b\n" @@ -614,7 +749,7 @@ " .long 7b,16b\n" ".previous" : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) + : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) : "eax", "edx", "memory"); return size; } @@ -627,90 +762,146 @@ */ unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size); -unsigned long __copy_user_intel(void __user *to, const void *from, +unsigned long __generic_copy_to_user_intel(void __user *to, const void *from, + unsigned long size); +unsigned long __generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size); unsigned long __copy_user_zeroing_intel_nocache(void *to, const void __user *from, unsigned long size); #endif /* CONFIG_X86_INTEL_USERCOPY */ /* Generic arbitrary sized copy. */ -#define __copy_user(to,from,size) \ -do { \ - int __d0, __d1, __d2; \ - __asm__ __volatile__( \ - " cmp $7,%0\n" \ - " jbe 1f\n" \ - " movl %1,%0\n" \ - " negl %0\n" \ - " andl $7,%0\n" \ - " subl %0,%3\n" \ - "4: rep; movsb\n" \ - " movl %3,%0\n" \ - " shrl $2,%0\n" \ - " andl $3,%3\n" \ - " .align 2,0x90\n" \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "5: addl %3,%0\n" \ - " jmp 2b\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 4b,5b\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ - : "3"(size), "0"(size), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -#define __copy_user_zeroing(to,from,size) \ -do { \ - int __d0, __d1, __d2; \ - __asm__ __volatile__( \ - " cmp $7,%0\n" \ - " jbe 1f\n" \ - " movl %1,%0\n" \ - " negl %0\n" \ - " andl $7,%0\n" \ - " subl %0,%3\n" \ - "4: rep; movsb\n" \ - " movl %3,%0\n" \ - " shrl $2,%0\n" \ - " andl $3,%3\n" \ - " .align 2,0x90\n" \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "5: addl %3,%0\n" \ - " jmp 6f\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - "6: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 4b,5b\n" \ - " .long 0b,3b\n" \ - " .long 1b,6b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ - : "3"(size), "0"(size), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) +static unsigned long +__generic_copy_to_user(void __user *to, const void *from, unsigned long size) +{ + int __d0, __d1, __d2; + + __asm__ __volatile__( + " movw %w8,%%es\n" + " cmp $7,%0\n" + " jbe 1f\n" + " movl %1,%0\n" + " negl %0\n" + " andl $7,%0\n" + " subl %0,%3\n" + "4: rep; movsb\n" + " movl %3,%0\n" + " shrl $2,%0\n" + " andl $3,%3\n" + " .align 2,0x90\n" + "0: rep; movsl\n" + " movl %3,%0\n" + "1: rep; movsb\n" + "2:\n" + " pushl %%ss\n" + " popl %%es\n" + ".section .fixup,\"ax\"\n" + "5: addl %3,%0\n" + " jmp 2b\n" + "3: lea 0(%3,%0,4),%0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 4b,5b\n" + " .long 0b,3b\n" + " .long 1b,2b\n" + ".previous" + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) + : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) + : "memory"); + return size; +} + +static unsigned long +__generic_copy_from_user(void *to, const void __user *from, unsigned long size) +{ + int __d0, __d1, __d2; + + __asm__ __volatile__( + " movw %w8,%%ds\n" + " cmp $7,%0\n" + " jbe 1f\n" + " movl %1,%0\n" + " negl %0\n" + " andl $7,%0\n" + " subl %0,%3\n" + "4: rep; movsb\n" + " movl %3,%0\n" + " shrl $2,%0\n" + " andl $3,%3\n" + " .align 2,0x90\n" + "0: rep; movsl\n" + " movl %3,%0\n" + "1: rep; movsb\n" + "2:\n" + " pushl %%ss\n" + " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "5: addl %3,%0\n" + " jmp 2b\n" + "3: lea 0(%3,%0,4),%0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 4b,5b\n" + " .long 0b,3b\n" + " .long 1b,2b\n" + ".previous" + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) + : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) + : "memory"); + return size; +} + +static unsigned long +__copy_user_zeroing(void *to, const void __user *from, unsigned long size) +{ + int __d0, __d1, __d2; + + __asm__ __volatile__( + " movw %w8,%%ds\n" + " cmp $7,%0\n" + " jbe 1f\n" + " movl %1,%0\n" + " negl %0\n" + " andl $7,%0\n" + " subl %0,%3\n" + "4: rep; movsb\n" + " movl %3,%0\n" + " shrl $2,%0\n" + " andl $3,%3\n" + " .align 2,0x90\n" + "0: rep; movsl\n" + " movl %3,%0\n" + "1: rep; movsb\n" + "2:\n" + " pushl %%ss\n" + " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "5: addl %3,%0\n" + " jmp 6f\n" + "3: lea 0(%3,%0,4),%0\n" + "6: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 4b,5b\n" + " .long 0b,3b\n" + " .long 1b,6b\n" + ".previous" + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) + : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) + : "memory"); + return size; +} unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) @@ -766,9 +957,9 @@ } #endif if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); + n = __generic_copy_to_user(to, from, n); else - n = __copy_user_intel(to, from, n); + n = __generic_copy_to_user_intel(to, from, n); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -778,7 +969,7 @@ { BUG_ON((long)n < 0); if (movsl_is_ok(to, from, n)) - __copy_user_zeroing(to, from, n); + n = __copy_user_zeroing(to, from, n); else n = __copy_user_zeroing_intel(to, from, n); return n; @@ -790,10 +981,9 @@ { BUG_ON((long)n < 0); if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); + n = __generic_copy_from_user(to, from, n); else - n = __copy_user_intel((void __user *)to, - (const void *)from, n); + n = __generic_copy_from_user_intel(to, from, n); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -804,11 +994,11 @@ BUG_ON((long)n < 0); #ifdef CONFIG_X86_INTEL_USERCOPY if ( n > 64 && cpu_has_xmm2) - n = __copy_user_zeroing_intel_nocache(to, from, n); + n = __copy_user_zeroing_intel_nocache(to, from, n); else - __copy_user_zeroing(to, from, n); + n = __copy_user_zeroing(to, from, n); #else - __copy_user_zeroing(to, from, n); + n = __copy_user_zeroing(to, from, n); #endif return n; } @@ -819,11 +1009,11 @@ BUG_ON((long)n < 0); #ifdef CONFIG_X86_INTEL_USERCOPY if ( n > 64 && cpu_has_xmm2) - n = __copy_user_intel_nocache(to, from, n); + n = __copy_user_intel_nocache(to, from, n); else - __copy_user(to, from, n); + n = __generic_copy_from_user(to, from, n); #else - __copy_user(to, from, n); + n = __generic_copy_from_user(to, from, n); #endif return n; } @@ -878,3 +1068,44 @@ return n; } EXPORT_SYMBOL(copy_from_user); + +#ifdef CONFIG_PAX_MEMORY_UDEREF +void __set_fs(mm_segment_t x, int cpu) +{ + unsigned long limit = x.seg; + __u32 a, b; + + current_thread_info()->addr_limit = x; + if (likely(limit)) + limit -= 1UL; + + pack_descriptor(&a, &b, 0UL, limit, 0xF3, 0xC); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_DEFAULT_USER_DS, a, b); +} + +void set_fs(mm_segment_t x) +{ + int cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + + __set_fs(x, cpu); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + + put_cpu_no_resched(); +} +#else +void set_fs(mm_segment_t x) +{ + current_thread_info()->addr_limit = x; +} +#endif + +EXPORT_SYMBOL(set_fs); diff -Naur linux-2.6.19.2.orig/arch/i386/mach-voyager/voyager_smp.c linux-2.6.19.2/arch/i386/mach-voyager/voyager_smp.c --- linux-2.6.19.2.orig/arch/i386/mach-voyager/voyager_smp.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/mach-voyager/voyager_smp.c 2007-01-12 23:27:21.000000000 +0000 @@ -1305,7 +1305,7 @@ per_cpu(prof_counter, cpu); } - update_process_times(user_mode_vm(get_irq_regs())); + update_process_times(user_mode(get_irq_regs())); } if( ((1< */ - -/* - * We need to use the 2-level pagetable functions, but CONFIG_X86_PAE - * keeps that from happenning. If anyone has a better way, I'm listening. - * - * boot_pte_t is defined only if this all works correctly - */ - -#undef CONFIG_X86_PAE #include #include #include #include #include -/* - * I'm cheating here. It is known that the two boot PTE pages are - * allocated next to each other. I'm pretending that they're just - * one big array. - */ - -#define BOOT_PTE_PTRS (PTRS_PER_PTE*2) - -static unsigned long boot_pte_index(unsigned long vaddr) -{ - return __pa(vaddr) >> PAGE_SHIFT; -} - -static inline boot_pte_t* boot_vaddr_to_pte(void *address) -{ - boot_pte_t* boot_pg = (boot_pte_t*)pg0; - return &boot_pg[boot_pte_index((unsigned long)address)]; -} - /* * This is only for a caller who is clever enough to page-align * phys_addr and virtual_source, and who also has a preference * about which virtual address from which to steal ptes */ -static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages, - void* virtual_source) +static void __init __boot_ioremap(unsigned long phys_addr, unsigned long nrpages, + char* virtual_source) { - boot_pte_t* pte; - int i; - char *vaddr = virtual_source; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t* pte; + unsigned int i; + unsigned long vaddr = (unsigned long)virtual_source; + + pgd = pgd_offset_k(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); - pte = boot_vaddr_to_pte(virtual_source); for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) { set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL)); - __flush_tlb_one(&vaddr[i*PAGE_SIZE]); + __flush_tlb_one(&virtual_source[i*PAGE_SIZE]); } } diff -Naur linux-2.6.19.2.orig/arch/i386/mm/extable.c linux-2.6.19.2/arch/i386/mm/extable.c --- linux-2.6.19.2.orig/arch/i386/mm/extable.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/mm/extable.c 2007-01-12 23:27:21.000000000 +0000 @@ -11,7 +11,7 @@ const struct exception_table_entry *fixup; #ifdef CONFIG_PNPBIOS - if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs))) + if (unlikely(!(regs->eflags & VM_MASK) && SEGMENT_IS_PNP_CODE(regs->xcs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; diff -Naur linux-2.6.19.2.orig/arch/i386/mm/fault.c linux-2.6.19.2/arch/i386/mm/fault.c --- linux-2.6.19.2.orig/arch/i386/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/mm/fault.c 2007-01-12 23:27:21.000000000 +0000 @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #include #include @@ -104,7 +107,8 @@ { unsigned long eip = regs->eip; unsigned seg = regs->xcs & 0xffff; - u32 seg_ar, seg_limit, base, *desc; + u32 seg_ar, seg_limit, base; + struct desc_struct *desc; /* Unlikely, but must come before segment checks. */ if (unlikely(regs->eflags & VM_MASK)) { @@ -118,7 +122,7 @@ /* By far the most common cases. */ if (likely(SEGMENT_IS_FLAT_CODE(seg))) - return eip; + return eip + (seg == __KERNEL_CS ? __KERNEL_TEXT_OFFSET : 0); /* Check the segment exists, is within the current LDT/GDT size, that kernel/user (ring 0..3) has the appropriate privilege, @@ -136,16 +140,14 @@ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ down(¤t->mm->context.sem); - desc = current->mm->context.ldt; - desc = (void *)desc + (seg & ~7); + desc = ¤t->mm->context.ldt[seg >> 3]; } else { /* Must disable preemption while reading the GDT. */ - desc = (u32 *)get_cpu_gdt_table(get_cpu()); - desc = (void *)desc + (seg & ~7); + desc = &get_cpu_gdt_table(get_cpu())[seg >> 3]; } /* Decode the code segment base from the descriptor */ - base = get_desc_base((unsigned long *)desc); + base = get_desc_base(desc); if (seg & (1<<2)) { up(¤t->mm->context.sem); @@ -246,6 +248,30 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long); +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +static int pax_handle_fetch_fault(struct pt_regs *regs); +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +static inline pmd_t * pax_get_pmd(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return NULL; + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return NULL; + return pmd; +} +#endif + static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) { unsigned index = pgd_index(address); @@ -326,14 +352,20 @@ struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; - unsigned long address; - unsigned long page; int write, si_code; +#ifdef CONFIG_PAX_PAGEEXEC + pmd_t *pmd; + pte_t *pte; + spinlock_t *ptl; + unsigned char pte_mask; +#endif + /* get the address */ - address = read_cr2(); + const unsigned long address = read_cr2(); tsk = current; + mm = tsk->mm; si_code = SEGV_MAPERR; @@ -372,14 +404,12 @@ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) local_irq_enable(); - mm = tsk->mm; - /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault.. */ if (in_atomic() || !mm) - goto bad_area_nosemaphore; + goto bad_area_nopax; /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -399,10 +429,101 @@ if (!down_read_trylock(&mm->mmap_sem)) { if ((error_code & 4) == 0 && !search_exception_tables(regs->eip)) - goto bad_area_nosemaphore; + goto bad_area_nopax; down_read(&mm->mmap_sem); } +#ifdef CONFIG_PAX_PAGEEXEC + if (unlikely((error_code & 5) != 5 || + (regs->eflags & X86_EFLAGS_VM) || + !(mm->pax_flags & MF_PAX_PAGEEXEC))) + goto not_pax_fault; + + /* PaX: it's our fault, let's handle it if we can */ + + /* PaX: take a look at read faults before acquiring any locks */ + if (unlikely(!(error_code & 2) && (regs->eip == address))) { + /* instruction fetch attempt from a protected page in user mode */ + up_read(&mm->mmap_sem); + +#ifdef CONFIG_PAX_EMUTRAMP + switch (pax_handle_fetch_fault(regs)) { + case 2: + return; + } +#endif + + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } + + pmd = pax_get_pmd(mm, address); + if (unlikely(!pmd)) + goto not_pax_fault; + + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + if (unlikely(!(pte_val(*pte) & _PAGE_PRESENT) || pte_user(*pte))) { + pte_unmap_unlock(pte, ptl); + goto not_pax_fault; + } + + if (unlikely((error_code & 2) && !pte_write(*pte))) { + /* write attempt to a protected page in user mode */ + pte_unmap_unlock(pte, ptl); + goto not_pax_fault; + } + +#ifdef CONFIG_SMP + if (likely(address > get_limit(regs->xcs) && cpu_isset(smp_processor_id(), mm->context.cpu_user_cs_mask))) +#else + if (likely(address > get_limit(regs->xcs))) +#endif + { + set_pte(pte, pte_mkread(*pte)); + __flush_tlb_one(address); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return; + } + + pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & 2) << (_PAGE_BIT_DIRTY-1)); + + /* + * PaX: fill DTLB with user rights and retry + */ + __asm__ __volatile__ ( + "movw %w4,%%ds\n" + "orb %2,%%ss:(%1)\n" +#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) +/* + * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's + * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* + * page fault when examined during a TLB load attempt. this is true not only + * for PTEs holding a non-present entry but also present entries that will + * raise a page fault (such as those set up by PaX, or the copy-on-write + * mechanism). in effect it means that we do *not* need to flush the TLBs + * for our target pages since their PTEs are simply not in the TLBs at all. + + * the best thing in omitting it is that we gain around 15-20% speed in the + * fast path of the page fault handler and can get rid of tracing since we + * can no longer flush unintended entries. + */ + "invlpg (%0)\n" +#endif + "testb $0,(%0)\n" + "xorb %3,%%ss:(%1)\n" + "pushl %%ss\n" + "popl %%ds\n" + : + : "q" (address), "r" (pte), "q" (pte_mask), "i" (_PAGE_USER), "r" (__USER_DS) + : "memory", "cc"); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return; + +not_pax_fault: +#endif + vma = find_vma(mm, address); if (!vma) goto bad_area; @@ -484,6 +605,36 @@ up_read(&mm->mmap_sem); bad_area_nosemaphore: + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (mm && (error_code & 4) && !(regs->eflags & X86_EFLAGS_VM)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && !(error_code & 3) && (regs->eip == address)) { + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && !(error_code & 3) && (regs->eip + SEGMEXEC_TASK_SIZE == address)) { + +#ifdef CONFIG_PAX_EMUTRAMP + switch (pax_handle_fetch_fault(regs)) { + case 2: + return; + } +#endif + + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + + } +#endif + +bad_area_nopax: /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { /* @@ -551,6 +702,22 @@ if (address < PAGE_SIZE) printk(KERN_ALERT "BUG: unable to handle kernel NULL " "pointer dereference"); + +#ifdef CONFIG_PAX_KERNEXEC +#ifdef CONFIG_MODULES + else if (init_mm.start_code <= address && address < (unsigned long)MODULES_END) { +#else + else if (init_mm.start_code <= address && address < init_mm.end_code) { +#endif + if (tsk->signal->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + NIPQUAD(tsk->signal->curr_ip), tsk->comm, tsk->pid, tsk->uid, tsk->euid); + else + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + tsk->comm, tsk->pid, tsk->uid, tsk->euid); + } +#endif + else printk(KERN_ALERT "BUG: unable to handle kernel paging" " request"); @@ -558,24 +725,34 @@ printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); } - page = read_cr3(); - page = ((unsigned long *) __va(page))[address >> 22]; - if (oops_may_print()) - printk(KERN_ALERT "*pde = %08lx\n", page); - /* - * We must not directly access the pte in the highpte - * case, the page table might be allocated in highmem. - * And lets rather not kmap-atomic the pte, just in case - * it's allocated already. - */ + + if (oops_may_print()) { + unsigned long index = pgd_index(address); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = index + (pgd_t *)__va(read_cr3()); + printk(KERN_ALERT "*pgd = %*llx\n", sizeof(*pgd), (unsigned long long)pgd_val(*pgd)); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + printk(KERN_ALERT "*pmd = %*llx\n", sizeof(*pmd), (unsigned long long)pmd_val(*pmd)); + /* + * We must not directly access the pte in the highpte + * case, the page table might be allocated in highmem. + * And lets rather not kmap-atomic the pte, just in case + * it's allocated already. + */ #ifndef CONFIG_HIGHPTE - if ((page & 1) && oops_may_print()) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte = %08lx\n", page); - } + if (pmd_present(*pmd) && !pmd_large(*pmd)) { + pte = pte_offset_kernel(pmd, address); + printk(KERN_ALERT "*pte = %*llx\n", sizeof(*pte), (unsigned long long)pte_val(*pte)); + } #endif + } + } tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; @@ -652,3 +829,101 @@ } } #endif + +#ifdef CONFIG_PAX_EMUTRAMP +/* + * PaX: decide what to do with offenders (regs->eip = fault address) + * + * returns 1 when task should be killed + * 2 when gcc trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + + static const unsigned char trans[8] = {6, 1, 2, 0, 13, 5, 3, 4}; + int err; + + if (regs->eflags & X86_EFLAGS_VM) + return 1; + + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned char mov1, mov2; + unsigned short jmp; + unsigned long addr1, addr2; + + err = get_user(mov1, (unsigned char __user *)regs->eip); + err |= get_user(addr1, (unsigned long __user *)(regs->eip + 1)); + err |= get_user(mov2, (unsigned char __user *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long __user *)(regs->eip + 6)); + err |= get_user(jmp, (unsigned short __user *)(regs->eip + 10)); + + if (err) + break; + + if ((mov1 & 0xF8) == 0xB8 && + (mov2 & 0xF8) == 0xB8 && + (mov1 & 0x07) != (mov2 & 0x07) && + (jmp & 0xF8FF) == 0xE0FF && + (mov2 & 0x07) == ((jmp>>8) & 0x07)) + { + ((unsigned long *)regs)[trans[mov1 & 0x07]] = addr1; + ((unsigned long *)regs)[trans[mov2 & 0x07]] = addr2; + regs->eip = addr2; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned char mov, jmp; + unsigned long addr1, addr2; + + err = get_user(mov, (unsigned char __user *)regs->eip); + err |= get_user(addr1, (unsigned long __user *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char __user *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long __user *)(regs->eip + 6)); + + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9) + { + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 2; + } + } while (0); + + return 1; /* PaX in action */ +} +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +void pax_report_insns(void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char __user *)pc+i)) + printk("?? "); + else + printk("%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-4: "); + for (i = -1; i < 20; i++) { + unsigned long c; + if (get_user(c, (unsigned long __user *)sp+i)) + printk("???????? "); + else + printk("%08lx ", c); + } + printk("\n"); +} +#endif diff -Naur linux-2.6.19.2.orig/arch/i386/mm/hugetlbpage.c linux-2.6.19.2/arch/i386/mm/hugetlbpage.c --- linux-2.6.19.2.orig/arch/i386/mm/hugetlbpage.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/mm/hugetlbpage.c 2007-01-12 23:27:21.000000000 +0000 @@ -120,7 +120,12 @@ { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; + unsigned long start_addr, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif if (len > mm->cached_hole_size) { start_addr = mm->free_area_cache; @@ -134,7 +139,7 @@ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { + if (task_size - len < addr) { /* * Start a new search - just in case we missed * some holes. @@ -162,9 +167,8 @@ { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev_vma; - unsigned long base = mm->mmap_base, addr = addr0; + unsigned long base = mm->mmap_base, addr; unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; /* don't allow allocations above current base */ if (mm->free_area_cache > base) @@ -174,7 +178,7 @@ largest_hole = 0; mm->free_area_cache = base; } -try_again: + /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) goto fail; @@ -216,16 +220,6 @@ fail: /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() @@ -251,16 +245,23 @@ { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long task_size = TASK_SIZE; if (len & ~HPAGE_MASK) return -EINVAL; - if (len > TASK_SIZE) + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > task_size || addr > task_size - len) return -ENOMEM; if (addr) { addr = ALIGN(addr, HPAGE_SIZE); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && + if (task_size - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; } diff -Naur linux-2.6.19.2.orig/arch/i386/mm/init.c linux-2.6.19.2/arch/i386/mm/init.c --- linux-2.6.19.2.orig/arch/i386/mm/init.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/mm/init.c 2007-01-12 23:27:21.000000000 +0000 @@ -42,6 +42,7 @@ #include #include #include +#include unsigned int __VMALLOC_RESERVE = 128 << 20; @@ -51,30 +52,6 @@ static int noinline do_test_wp_bit(void); /* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - if (pmd_table != pmd_offset(pud, 0)) - BUG(); -#else - pud = pud_offset(pgd, 0); - pmd_table = pmd_offset(pud, 0); -#endif - - return pmd_table; -} - -/* * Create a page table and place a pointer to it in a middle page * directory entry. */ @@ -82,7 +59,11 @@ { if (pmd_none(*pmd)) { pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE)); +#else set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); +#endif if (page_table != pte_offset_kernel(pmd, 0)) BUG(); @@ -117,8 +98,6 @@ pgd = pgd_base + pgd_idx; for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - if (pgd_none(*pgd)) - one_md_table_init(pgd); pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { @@ -131,11 +110,22 @@ } } -static inline int is_kernel_text(unsigned long addr) +static inline int is_kernel_text(unsigned long start, unsigned long end) { - if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) - return 1; - return 0; + unsigned long etext; + +#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) + etext = (unsigned long)&MODULES_END - __KERNEL_TEXT_OFFSET; +#else + etext = (unsigned long)&_etext; +#endif + + if ((start > etext + __KERNEL_TEXT_OFFSET || + end <= (unsigned long)_stext + __KERNEL_TEXT_OFFSET) && + (start > (unsigned long)_einittext + __KERNEL_TEXT_OFFSET || + end <= (unsigned long)_sinittext + __KERNEL_TEXT_OFFSET)) + return 0; + return 1; } /* @@ -147,26 +137,24 @@ { unsigned long pfn; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; - int pgd_idx, pmd_idx, pte_ofs; + unsigned int pgd_idx, pmd_idx, pte_ofs; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = 0; - for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - if (pfn >= max_low_pfn) - continue; + for (; pgd_idx < PTRS_PER_PGD && pfn < max_low_pfn; pgd++, pgd_idx++) { + pud = pud_offset(pgd, 0); + pmd = pmd_offset(pud, 0); for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { - unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; + unsigned long address = pfn * PAGE_SIZE + PAGE_OFFSET; /* Map with big pages if possible, otherwise create normal page tables. */ if (cpu_has_pse) { - unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - - if (is_kernel_text(address) || is_kernel_text(address2)) + if (is_kernel_text(address, address + PMD_SIZE)) set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); else set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); @@ -175,7 +163,7 @@ pte = one_page_table_init(pmd); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { - if (is_kernel_text(address)) + if (is_kernel_text(address, address + PAGE_SIZE)) set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); else set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); @@ -342,13 +330,6 @@ unsigned long vaddr; pgd_t *pgd_base = swapper_pg_dir; -#ifdef CONFIG_X86_PAE - int i; - /* Init entries of the first-level page table to the zero page */ - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); -#endif - /* Enable PSE if available */ if (cpu_has_pse) { set_in_cr4(X86_CR4_PSE); @@ -372,17 +353,6 @@ page_table_range_init(vaddr, 0, pgd_base); permanent_kmaps_init(pgd_base); - -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); -#endif } #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) @@ -424,7 +394,6 @@ flush_tlb_all(); } -static int disable_nx __initdata = 0; u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; /* @@ -438,13 +407,10 @@ static int __init noexec_setup(char *str) { if (!str || !strcmp(str, "on")) { - if (cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } + if (cpu_has_nx) + nx_enabled = 1; } else if (!strcmp(str,"off")) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; + nx_enabled = 0; } else return -EINVAL; @@ -457,17 +423,13 @@ static void __init set_nx(void) { - unsigned int v[4], l, h; + if (!nx_enabled && cpu_has_nx) { + unsigned l, h; - if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { - cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); - if ((v[3] & (1 << 20)) && !disable_nx) { - rdmsr(MSR_EFER, l, h); - l |= EFER_NX; - wrmsr(MSR_EFER, l, h); - nx_enabled = 1; - __supported_pte_mask |= _PAGE_NX; - } + __supported_pte_mask &= ~_PAGE_NX; + rdmsr(MSR_EFER, l, h); + l &= ~EFER_NX; + wrmsr(MSR_EFER, l, h); } } @@ -520,14 +482,6 @@ load_cr3(swapper_pg_dir); -#ifdef CONFIG_X86_PAE - /* - * We will bail out later - printk doesn't work right now so - * the user would just see a hanging kernel. - */ - if (cpu_has_pae) - set_in_cr4(X86_CR4_PAE); -#endif __flush_tlb_all(); kmap_init(); @@ -598,7 +552,7 @@ set_highmem_pages_init(bad_ppro); codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; + datasize = (unsigned long) &_edata - (unsigned long) &_data; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); @@ -643,10 +597,10 @@ (unsigned long)&__init_begin, (unsigned long)&__init_end, ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, - (unsigned long)&_etext, (unsigned long)&_edata, - ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, + (unsigned long)&_data, (unsigned long)&_edata, + ((unsigned long)&_edata - (unsigned long)&_data) >> 10, - (unsigned long)&_text, (unsigned long)&_etext, + (unsigned long)&_text + __KERNEL_TEXT_OFFSET, (unsigned long)&_etext + __KERNEL_TEXT_OFFSET, ((unsigned long)&_etext - (unsigned long)&_text) >> 10); #ifdef CONFIG_HIGHMEM @@ -657,10 +611,6 @@ BUG_ON((unsigned long)high_memory > VMALLOC_START); #endif /* double-sanity-check paranoia */ -#ifdef CONFIG_X86_PAE - if (!cpu_has_pae) - panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); -#endif if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); @@ -789,6 +739,38 @@ void free_initmem(void) { + +#ifdef CONFIG_PAX_KERNEXEC + /* PaX: limit KERNEL_CS to actual size */ + unsigned long addr, limit; + __u32 a, b; + int cpu; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + +#ifdef CONFIG_MODULES + limit = (unsigned long)&MODULES_END - __KERNEL_TEXT_OFFSET; +#else + limit = (unsigned long)&_etext; +#endif + limit = (limit - 1UL) >> PAGE_SHIFT; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + pack_descriptor(&a, &b, get_desc_base(&get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS]), limit, 0x9B, 0xC); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_KERNEL_CS, a, b); + } + + /* PaX: make KERNEL_CS read-only */ + for (addr = __KERNEL_TEXT_OFFSET; addr < (unsigned long)&_data; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } + flush_tlb_all(); +#endif + free_init_pages("unused kernel memory", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); diff -Naur linux-2.6.19.2.orig/arch/i386/mm/mmap.c linux-2.6.19.2/arch/i386/mm/mmap.c --- linux-2.6.19.2.orig/arch/i386/mm/mmap.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/mm/mmap.c 2007-01-12 23:27:21.000000000 +0000 @@ -34,12 +34,18 @@ * Leave an at least ~128 MB hole. */ #define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) +#define MAX_GAP (task_size/6*5) static inline unsigned long mmap_base(struct mm_struct *mm) { unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; unsigned long random_factor = 0; + unsigned long task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif if (current->flags & PF_RANDOMIZE) random_factor = get_random_int() % (1024*1024); @@ -49,7 +55,7 @@ else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - random_factor); + return PAGE_ALIGN(task_size - gap - random_factor); } /* @@ -66,10 +72,22 @@ (current->personality & ADDR_COMPAT_LAYOUT) || current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -Naur linux-2.6.19.2.orig/arch/i386/mm/pageattr.c linux-2.6.19.2/arch/i386/mm/pageattr.c --- linux-2.6.19.2.orig/arch/i386/mm/pageattr.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/mm/pageattr.c 2007-01-12 23:27:21.000000000 +0000 @@ -13,6 +13,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(cpa_lock); static struct list_head df_list = LIST_HEAD_INIT(df_list); @@ -83,7 +84,18 @@ struct page *page; unsigned long flags; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + set_pte_atomic(kpte, pte); /* change init_mm */ + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + if (PTRS_PER_PMD > 1) return; @@ -110,7 +122,7 @@ pte_t *linear; ref_prot = - ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext + __KERNEL_TEXT_OFFSET) ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; linear = (pte_t *) @@ -142,7 +154,7 @@ struct page *split; ref_prot = - ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext + __KERNEL_TEXT_OFFSET) ? PAGE_KERNEL_EXEC : PAGE_KERNEL; split = split_large_page(address, prot, ref_prot); if (!split) diff -Naur linux-2.6.19.2.orig/arch/i386/oprofile/backtrace.c linux-2.6.19.2/arch/i386/oprofile/backtrace.c --- linux-2.6.19.2.orig/arch/i386/oprofile/backtrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/oprofile/backtrace.c 2007-01-12 23:27:21.000000000 +0000 @@ -116,7 +116,7 @@ head = (struct frame_head *)regs->ebp; #endif - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { while (depth-- && valid_kernel_stack(head, regs)) head = dump_kernel_backtrace(head); return; diff -Naur linux-2.6.19.2.orig/arch/i386/oprofile/op_model_p4.c linux-2.6.19.2/arch/i386/oprofile/op_model_p4.c --- linux-2.6.19.2.orig/arch/i386/oprofile/op_model_p4.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/oprofile/op_model_p4.c 2007-01-12 23:27:22.000000000 +0000 @@ -47,7 +47,7 @@ #endif } -static int inline addr_increment(void) +static inline int addr_increment(void) { #ifdef CONFIG_SMP return smp_num_siblings == 2 ? 2 : 1; diff -Naur linux-2.6.19.2.orig/arch/i386/pci/early.c linux-2.6.19.2/arch/i386/pci/early.c --- linux-2.6.19.2.orig/arch/i386/pci/early.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/pci/early.c 2007-01-12 23:27:22.000000000 +0000 @@ -7,7 +7,7 @@ /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ -#define PDprintk(x...) +#define PDprintk(x...) do {} while (0) u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) { diff -Naur linux-2.6.19.2.orig/arch/i386/power/cpu.c linux-2.6.19.2/arch/i386/power/cpu.c --- linux-2.6.19.2.orig/arch/i386/power/cpu.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/i386/power/cpu.c 2007-01-12 23:27:22.000000000 +0000 @@ -63,7 +63,7 @@ static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ diff -Naur linux-2.6.19.2.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.19.2/arch/ia64/ia32/binfmt_elf32.c --- linux-2.6.19.2.orig/arch/ia64/ia32/binfmt_elf32.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/ia64/ia32/binfmt_elf32.c 2007-01-12 23:27:22.000000000 +0000 @@ -45,6 +45,17 @@ #define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) ((tsk)->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - IA32_PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - IA32_PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - IA32_PAGE_SHIFT) +#endif + /* Ugly but avoids duplication */ #include "../../../fs/binfmt_elf.c" diff -Naur linux-2.6.19.2.orig/arch/ia64/ia32/ia32priv.h linux-2.6.19.2/arch/ia64/ia32/ia32priv.h --- linux-2.6.19.2.orig/arch/ia64/ia32/ia32priv.h 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/ia64/ia32/ia32priv.h 2007-01-12 23:27:22.000000000 +0000 @@ -304,7 +304,14 @@ #define ELF_DATA ELFDATA2LSB #define ELF_ARCH EM_386 -#define IA32_STACK_TOP IA32_PAGE_OFFSET +#ifdef CONFIG_PAX_RANDUSTACK +#define __IA32_DELTA_STACK (current->mm->delta_stack) +#else +#define __IA32_DELTA_STACK 0UL +#endif + +#define IA32_STACK_TOP (IA32_PAGE_OFFSET - __IA32_DELTA_STACK) + #define IA32_GATE_OFFSET IA32_PAGE_OFFSET #define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE diff -Naur linux-2.6.19.2.orig/arch/ia64/kernel/module.c linux-2.6.19.2/arch/ia64/kernel/module.c --- linux-2.6.19.2.orig/arch/ia64/kernel/module.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/ia64/kernel/module.c 2007-01-12 23:27:22.000000000 +0000 @@ -321,7 +321,7 @@ void module_free (struct module *mod, void *module_region) { - if (mod->arch.init_unw_table && module_region == mod->module_init) { + if (mod->arch.init_unw_table && module_region == mod->module_init_rx) { unw_remove_unwind_table(mod->arch.init_unw_table); mod->arch.init_unw_table = NULL; } @@ -499,15 +499,39 @@ } static inline int +in_init_rx (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init_rx < mod->init_size_rx; +} + +static inline int +in_init_rw (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init_rw < mod->init_size_rw; +} + +static inline int in_init (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_init < mod->init_size; + return in_init_rx(mod, value) || in_init_rw(mod, value); +} + +static inline int +in_core_rx (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core_rx < mod->core_size_rx; +} + +static inline int +in_core_rw (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core_rw < mod->core_size_rw; } static inline int in_core (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_core < mod->core_size; + return in_core_rx(mod, value) || in_core_rw(mod, value); } static inline int @@ -691,7 +715,14 @@ break; case RV_BDREL: - val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + if (in_init_rx(mod, val)) + val -= (uint64_t) mod->module_init_rx; + else if (in_init_rw(mod, val)) + val -= (uint64_t) mod->module_init_rw; + else if (in_core_rx(mod, val)) + val -= (uint64_t) mod->module_core_rx; + else if (in_core_rw(mod, val)) + val -= (uint64_t) mod->module_core_rw; break; case RV_LTV: @@ -825,15 +856,15 @@ * addresses have been selected... */ uint64_t gp; - if (mod->core_size > MAX_LTOFF) + if (mod->core_size_rx + mod->core_size_rw > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = mod->core_size - MAX_LTOFF / 2; + gp = mod->core_size_rx + mod->core_size_rw - MAX_LTOFF / 2; else - gp = mod->core_size / 2; - gp = (uint64_t) mod->module_core + ((gp + 7) & -8); + gp = (mod->core_size_rx + mod->core_size_rw) / 2; + gp = (uint64_t) mod->module_core_rx + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp); } diff -Naur linux-2.6.19.2.orig/arch/ia64/kernel/ptrace.c linux-2.6.19.2/arch/ia64/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/ia64/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/ia64/kernel/ptrace.c 2007-01-12 23:27:22.000000000 +0000 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -1446,6 +1447,9 @@ if (pid == 1) /* no messing around with init! */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; diff -Naur linux-2.6.19.2.orig/arch/ia64/kernel/sys_ia64.c linux-2.6.19.2/arch/ia64/kernel/sys_ia64.c --- linux-2.6.19.2.orig/arch/ia64/kernel/sys_ia64.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/ia64/kernel/sys_ia64.c 2007-01-12 23:27:22.000000000 +0000 @@ -37,6 +37,13 @@ if (REGION_NUMBER(addr) == RGN_HPAGE) addr = 0; #endif + +#ifdef CONFIG_PAX_RANDMMAP + if ((mm->pax_flags & MF_PAX_RANDMMAP) && addr && filp) + addr = mm->free_area_cache; + else +#endif + if (!addr) addr = mm->free_area_cache; @@ -55,9 +62,9 @@ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { + if (start_addr != mm->mmap_base) { /* Start a new search --- just in case we missed some holes. */ - addr = TASK_UNMAPPED_BASE; + addr = mm->mmap_base; goto full_search; } return -ENOMEM; diff -Naur linux-2.6.19.2.orig/arch/ia64/mm/fault.c linux-2.6.19.2/arch/ia64/mm/fault.c --- linux-2.6.19.2.orig/arch/ia64/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/ia64/mm/fault.c 2007-01-12 23:27:22.000000000 +0000 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -85,6 +86,23 @@ return pte_present(pte); } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + void __kprobes ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) { @@ -152,9 +170,23 @@ mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); - if ((vma->vm_flags & mask) != mask) + if ((vma->vm_flags & mask) != mask) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) { + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip) + goto bad_area; + + up_read(&mm->mmap_sem); + pax_report_fault(regs, (void*)regs->cr_iip, (void*)regs->r12); + do_exit(SIGKILL); + } +#endif + goto bad_area; + } + survive: /* * If for any reason at all we couldn't handle the fault, make diff -Naur linux-2.6.19.2.orig/arch/ia64/mm/init.c linux-2.6.19.2/arch/ia64/mm/init.c --- linux-2.6.19.2.orig/arch/ia64/mm/init.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/ia64/mm/init.c 2007-01-12 23:27:22.000000000 +0000 @@ -19,8 +19,8 @@ #include #include #include +#include -#include #include #include #include diff -Naur linux-2.6.19.2.orig/arch/mips/kernel/binfmt_elfn32.c linux-2.6.19.2/arch/mips/kernel/binfmt_elfn32.c --- linux-2.6.19.2.orig/arch/mips/kernel/binfmt_elfn32.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/mips/kernel/binfmt_elfn32.c 2007-01-12 23:27:22.000000000 +0000 @@ -50,6 +50,17 @@ #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include #include #include diff -Naur linux-2.6.19.2.orig/arch/mips/kernel/binfmt_elfo32.c linux-2.6.19.2/arch/mips/kernel/binfmt_elfo32.c --- linux-2.6.19.2.orig/arch/mips/kernel/binfmt_elfo32.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/mips/kernel/binfmt_elfo32.c 2007-01-12 23:27:22.000000000 +0000 @@ -52,6 +52,17 @@ #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include #include #include diff -Naur linux-2.6.19.2.orig/arch/mips/kernel/syscall.c linux-2.6.19.2/arch/mips/kernel/syscall.c --- linux-2.6.19.2.orig/arch/mips/kernel/syscall.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/mips/kernel/syscall.c 2007-01-12 23:27:22.000000000 +0000 @@ -88,6 +88,11 @@ do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -98,7 +103,7 @@ (!vmm || addr + len <= vmm->vm_start)) return addr; } - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); else diff -Naur linux-2.6.19.2.orig/arch/mips/mm/fault.c linux-2.6.19.2/arch/mips/mm/fault.c --- linux-2.6.19.2.orig/arch/mips/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/mips/mm/fault.c 2007-01-12 23:27:22.000000000 +0000 @@ -27,6 +27,23 @@ #include #include /* For VMALLOC_END */ +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(void *pc) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate diff -Naur linux-2.6.19.2.orig/arch/parisc/kernel/module.c linux-2.6.19.2/arch/parisc/kernel/module.c --- linux-2.6.19.2.orig/arch/parisc/kernel/module.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/parisc/kernel/module.c 2007-01-12 23:27:22.000000000 +0000 @@ -72,16 +72,38 @@ /* three functions to determine where in the module core * or init pieces the location is */ +static inline int in_init_rx(struct module *me, void *loc) +{ + return (loc >= me->module_init_rx && + loc < (me->module_init_rx + me->init_size_rx)); +} + +static inline int in_init_rw(struct module *me, void *loc) +{ + return (loc >= me->module_init_rw && + loc < (me->module_init_rw + me->init_size_rw)); +} + static inline int in_init(struct module *me, void *loc) { - return (loc >= me->module_init && - loc <= (me->module_init + me->init_size)); + return in_init_rx(me, loc) || in_init_rw(me, loc); +} + +static inline int in_core_rx(struct module *me, void *loc) +{ + return (loc >= me->module_core_rx && + loc < (me->module_core_rx + me->core_size_rx)); +} + +static inline int in_core_rw(struct module *me, void *loc) +{ + return (loc >= me->module_core_rw && + loc < (me->module_core_rw + me->core_size_rw)); } static inline int in_core(struct module *me, void *loc) { - return (loc >= me->module_core && - loc <= (me->module_core + me->core_size)); + return in_core_rx(me, loc) || in_core_rw(me, loc); } static inline int in_local(struct module *me, void *loc) @@ -295,21 +317,21 @@ } /* align things a bit */ - me->core_size = ALIGN(me->core_size, 16); - me->arch.got_offset = me->core_size; - me->core_size += gots * sizeof(struct got_entry); - - me->core_size = ALIGN(me->core_size, 16); - me->arch.fdesc_offset = me->core_size; - me->core_size += fdescs * sizeof(Elf_Fdesc); - - me->core_size = ALIGN(me->core_size, 16); - me->arch.stub_offset = me->core_size; - me->core_size += stubs * sizeof(struct stub_entry); - - me->init_size = ALIGN(me->init_size, 16); - me->arch.init_stub_offset = me->init_size; - me->init_size += init_stubs * sizeof(struct stub_entry); + me->core_size_rw = ALIGN(me->core_size_rw, 16); + me->arch.got_offset = me->core_size_rw; + me->core_size_rw += gots * sizeof(struct got_entry); + + me->core_size_rw = ALIGN(me->core_size_rw, 16); + me->arch.fdesc_offset = me->core_size_rw; + me->core_size_rw += fdescs * sizeof(Elf_Fdesc); + + me->core_size_rx = ALIGN(me->core_size_rx, 16); + me->arch.stub_offset = me->core_size_rx; + me->core_size_rx += stubs * sizeof(struct stub_entry); + + me->init_size_rx = ALIGN(me->init_size_rx, 16); + me->arch.init_stub_offset = me->init_size_rx; + me->init_size_rx += init_stubs * sizeof(struct stub_entry); me->arch.got_max = gots; me->arch.fdesc_max = fdescs; @@ -329,7 +351,7 @@ BUG_ON(value == 0); - got = me->module_core + me->arch.got_offset; + got = me->module_core_rw + me->arch.got_offset; for (i = 0; got[i].addr; i++) if (got[i].addr == value) goto out; @@ -347,7 +369,7 @@ #ifdef __LP64__ static Elf_Addr get_fdesc(struct module *me, unsigned long value) { - Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; + Elf_Fdesc *fdesc = me->module_core_rw + me->arch.fdesc_offset; if (!value) { printk(KERN_ERR "%s: zero OPD requested!\n", me->name); @@ -365,7 +387,7 @@ /* Create new one */ fdesc->addr = value; - fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; + fdesc->gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; return (Elf_Addr)fdesc; } #endif /* __LP64__ */ @@ -385,12 +407,12 @@ if(init_section) { i = me->arch.init_stub_count++; BUG_ON(me->arch.init_stub_count > me->arch.init_stub_max); - stub = me->module_init + me->arch.init_stub_offset + + stub = me->module_init_rx + me->arch.init_stub_offset + i * sizeof(struct stub_entry); } else { i = me->arch.stub_count++; BUG_ON(me->arch.stub_count > me->arch.stub_max); - stub = me->module_core + me->arch.stub_offset + + stub = me->module_core_rx + me->arch.stub_offset + i * sizeof(struct stub_entry); } @@ -758,7 +780,7 @@ table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; end = table + sechdrs[me->arch.unwind_section].sh_size; - gp = (Elf_Addr)me->module_core + me->arch.got_offset; + gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); diff -Naur linux-2.6.19.2.orig/arch/parisc/kernel/ptrace.c linux-2.6.19.2/arch/parisc/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/parisc/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/parisc/kernel/ptrace.c 2007-01-12 23:27:22.000000000 +0000 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff -Naur linux-2.6.19.2.orig/arch/parisc/kernel/sys_parisc.c linux-2.6.19.2/arch/parisc/kernel/sys_parisc.c --- linux-2.6.19.2.orig/arch/parisc/kernel/sys_parisc.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/parisc/kernel/sys_parisc.c 2007-01-12 23:27:22.000000000 +0000 @@ -107,7 +107,7 @@ if (len > TASK_SIZE) return -ENOMEM; if (!addr) - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (filp) { addr = get_shared_area(filp->f_mapping, addr, len, pgoff); diff -Naur linux-2.6.19.2.orig/arch/parisc/kernel/traps.c linux-2.6.19.2/arch/parisc/kernel/traps.c --- linux-2.6.19.2.orig/arch/parisc/kernel/traps.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/parisc/kernel/traps.c 2007-01-12 23:27:22.000000000 +0000 @@ -716,9 +716,7 @@ down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm,regs->iaoq[0]); - if (vma && (regs->iaoq[0] >= vma->vm_start) - && (vma->vm_flags & VM_EXEC)) { - + if (vma && (regs->iaoq[0] >= vma->vm_start)) { fault_address = regs->iaoq[0]; fault_space = regs->iasq[0]; diff -Naur linux-2.6.19.2.orig/arch/parisc/mm/fault.c linux-2.6.19.2/arch/parisc/mm/fault.c --- linux-2.6.19.2.orig/arch/parisc/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/parisc/mm/fault.c 2007-01-12 23:27:22.000000000 +0000 @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -57,7 +59,7 @@ static unsigned long parisc_acctyp(unsigned long code, unsigned int inst) { - if (code == 6 || code == 16) + if (code == 6 || code == 7 || code == 16) return VM_EXEC; switch (inst & 0xf0000000) { @@ -143,6 +145,116 @@ } #endif +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address) + * + * returns 1 when task should be killed + * 2 when rt_sigreturn trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: unpatched PLT emulation */ + unsigned int bl, depwi; + + err = get_user(bl, (unsigned int*)instruction_pointer(regs)); + err |= get_user(depwi, (unsigned int*)(instruction_pointer(regs)+4)); + + if (err) + break; + + if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) { + unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12; + + err = get_user(ldw, (unsigned int*)addr); + err |= get_user(bv, (unsigned int*)(addr+4)); + err |= get_user(ldw2, (unsigned int*)(addr+8)); + + if (err) + break; + + if (ldw == 0x0E801096U && + bv == 0xEAC0C000U && + ldw2 == 0x0E881095U) + { + unsigned int resolver, map; + + err = get_user(resolver, (unsigned int*)(instruction_pointer(regs)+8)); + err |= get_user(map, (unsigned int*)(instruction_pointer(regs)+12)); + if (err) + break; + + regs->gr[20] = instruction_pointer(regs)+8; + regs->gr[21] = map; + regs->gr[22] = resolver; + regs->iaoq[0] = resolver | 3UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + return 3; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + +#ifndef CONFIG_PAX_EMUSIGRT + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: rt_sigreturn emulation */ + unsigned int ldi1, ldi2, bel, nop; + + err = get_user(ldi1, (unsigned int *)instruction_pointer(regs)); + err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4)); + err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8)); + err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12)); + + if (err) + break; + + if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) && + ldi2 == 0x3414015AU && + bel == 0xE4008200U && + nop == 0x08000240U) + { + regs->gr[25] = (ldi1 & 2) >> 1; + regs->gr[20] = __NR_rt_sigreturn; + regs->gr[31] = regs->iaoq[1] + 16; + regs->sr[0] = regs->iasq[1]; + regs->iaoq[0] = 0x100UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + regs->iasq[0] = regs->sr[2]; + regs->iasq[1] = regs->sr[2]; + return 2; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address) { @@ -168,8 +280,33 @@ acc_type = parisc_acctyp(code,regs->iir); - if ((vma->vm_flags & acc_type) != acc_type) + if ((vma->vm_flags & acc_type) != acc_type) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) && + (address & ~3UL) == instruction_pointer(regs)) + { + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 3: + return; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } + pax_report_fault(regs, (void*)instruction_pointer(regs), (void*)regs->gr[30]); + do_exit(SIGKILL); + } +#endif + goto bad_area; + } /* * If for any reason at all we couldn't handle the fault, make diff -Naur linux-2.6.19.2.orig/arch/powerpc/kernel/module_32.c linux-2.6.19.2/arch/powerpc/kernel/module_32.c --- linux-2.6.19.2.orig/arch/powerpc/kernel/module_32.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/powerpc/kernel/module_32.c 2007-01-12 23:27:22.000000000 +0000 @@ -125,7 +125,7 @@ me->arch.core_plt_section = i; } if (!me->arch.core_plt_section || !me->arch.init_plt_section) { - printk("Module doesn't contain .plt or .init.plt sections.\n"); + printk("Module %s doesn't contain .plt or .init.plt sections.\n", me->name); return -ENOEXEC; } @@ -166,11 +166,16 @@ DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if ((location >= mod->module_core_rx && location < mod->module_core_rx + mod->core_size_rx) || + (location >= mod->module_core_rw && location < mod->module_core_rw + mod->core_size_rw)) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; - else + else if ((location >= mod->module_init_rx && location < mod->module_init_rx + mod->init_size_rx) || + (location >= mod->module_init_rw && location < mod->module_init_rw + mod->init_size_rw)) entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; + else { + printk(KERN_ERR "%s: invalid R_PPC_REL24 entry found\n", mod->name); + return ~0UL; + } /* Find this entry, or if that fails, the next avail. entry */ while (entry->jump[0]) { diff -Naur linux-2.6.19.2.orig/arch/powerpc/kernel/signal_32.c linux-2.6.19.2/arch/powerpc/kernel/signal_32.c --- linux-2.6.19.2.orig/arch/powerpc/kernel/signal_32.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/powerpc/kernel/signal_32.c 2007-01-12 23:27:22.000000000 +0000 @@ -759,7 +759,7 @@ /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { + if (vdso32_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { if (save_user_regs(regs, frame, 0)) goto badframe; regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp; diff -Naur linux-2.6.19.2.orig/arch/powerpc/kernel/signal_64.c linux-2.6.19.2/arch/powerpc/kernel/signal_64.c --- linux-2.6.19.2.orig/arch/powerpc/kernel/signal_64.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/powerpc/kernel/signal_64.c 2007-01-12 23:27:22.000000000 +0000 @@ -397,7 +397,7 @@ current->thread.fpscr.val = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { + if (vdso64_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); diff -Naur linux-2.6.19.2.orig/arch/powerpc/kernel/vdso.c linux-2.6.19.2/arch/powerpc/kernel/vdso.c --- linux-2.6.19.2.orig/arch/powerpc/kernel/vdso.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/powerpc/kernel/vdso.c 2007-01-12 23:27:22.000000000 +0000 @@ -239,7 +239,7 @@ vdso_base = VDSO32_MBASE; #endif - current->mm->context.vdso_base = 0; + current->mm->context.vdso_base = ~0UL; /* vDSO has a problem and was disabled, just don't "enable" it for the * process @@ -256,7 +256,7 @@ */ down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, vdso_base, - vdso_pages << PAGE_SHIFT, 0, 0); + vdso_pages << PAGE_SHIFT, 0, MAP_PRIVATE | MAP_EXECUTABLE); if (IS_ERR_VALUE(vdso_base)) { rc = vdso_base; goto fail_mmapsem; @@ -284,8 +284,14 @@ * pages though */ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC; + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) + vma->vm_flags &= ~VM_MAYWRITE; +#endif + vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; + vma->vm_page_prot = protection_map[vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; vma->vm_ops = &vdso_vmops; /* Insert new VMA */ diff -Naur linux-2.6.19.2.orig/arch/powerpc/mm/fault.c linux-2.6.19.2/arch/powerpc/mm/fault.c --- linux-2.6.19.2.orig/arch/powerpc/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/powerpc/mm/fault.c 2007-01-12 23:27:22.000000000 +0000 @@ -28,6 +28,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include @@ -73,6 +79,364 @@ } #endif +#ifdef CONFIG_PAX_EMUSIGRT +void pax_syscall_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_syscall = 0UL; +} + +static struct page* pax_syscall_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x44000002U; /* sc */ + __flush_dcache_icache(kaddr); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + .close = pax_syscall_close, + .nopage = pax_syscall_nopage, +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + * 2 when patched GOT trampoline was detected + * 3 when patched PLT trampoline was detected + * 4 when unpatched PLT trampoline was detected + * 5 when sigreturn trampoline was detected + * 6 when rt_sigreturn trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) + int err; +#endif + +#ifdef CONFIG_PAX_EMUPLT + do { /* PaX: patched GOT emulation */ + unsigned int blrl; + + err = get_user(blrl, (unsigned int*)regs->nip); + + if (!err && blrl == 0x4E800021U) { + unsigned long temp = regs->nip; + + regs->nip = regs->link & 0xFFFFFFFCUL; + regs->link = temp + 4UL; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #1 */ + unsigned int b; + + err = get_user(b, (unsigned int *)regs->nip); + + if (!err && (b & 0xFC000003U) == 0x48000000U) { + regs->nip += (((b | 0xFC000000UL) ^ 0x02000000UL) + 0x02000000UL); + return 3; + } + } while (0); + + do { /* PaX: unpatched PLT emulation #1 */ + unsigned int li, b; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(b, (unsigned int *)(regs->nip+4)); + + if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { + unsigned int rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(rlwinm, (unsigned int*)addr); + err |= get_user(add, (unsigned int*)(addr+4)); + err |= get_user(li2, (unsigned int*)(addr+8)); + err |= get_user(addis2, (unsigned int*)(addr+12)); + err |= get_user(mtctr, (unsigned int*)(addr+16)); + err |= get_user(li3, (unsigned int*)(addr+20)); + err |= get_user(addis3, (unsigned int*)(addr+24)); + err |= get_user(bctr, (unsigned int*)(addr+28)); + + if (err) + break; + + if (rlwinm == 0x556C083CU && + add == 0x7D6C5A14U && + (li2 & 0xFFFF0000U) == 0x39800000U && + (addis2 & 0xFFFF0000U) == 0x3D8C0000U && + mtctr == 0x7D8903A6U && + (li3 & 0xFFFF0000U) == 0x39800000U && + (addis3 & 0xFFFF0000U) == 0x3D8C0000U && + bctr == 0x4E800420U) + { + regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; + regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->ctr += (addis2 & 0xFFFFU) << 16; + regs->nip = regs->ctr; + return 4; + } + } + } while (0); + +#if 0 + do { /* PaX: unpatched PLT emulation #2 */ + unsigned int lis, lwzu, b, bctr; + + err = get_user(lis, (unsigned int *)regs->nip); + err |= get_user(lwzu, (unsigned int *)(regs->nip+4)); + err |= get_user(b, (unsigned int *)(regs->nip+8)); + err |= get_user(bctr, (unsigned int *)(regs->nip+12)); + + if (err) + break; + + if ((lis & 0xFFFF0000U) == 0x39600000U && + (lwzu & 0xU) == 0xU && + (b & 0xFC000003U) == 0x48000000U && + bctr == 0x4E800420U) + { + unsigned int addis, addi, rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 12 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(addis, (unsigned int*)addr); + err |= get_user(addi, (unsigned int*)(addr+4)); + err |= get_user(rlwinm, (unsigned int*)(addr+8)); + err |= get_user(add, (unsigned int*)(addr+12)); + err |= get_user(li2, (unsigned int*)(addr+16)); + err |= get_user(addis2, (unsigned int*)(addr+20)); + err |= get_user(mtctr, (unsigned int*)(addr+24)); + err |= get_user(li3, (unsigned int*)(addr+28)); + err |= get_user(addis3, (unsigned int*)(addr+32)); + err |= get_user(bctr, (unsigned int*)(addr+36)); + + if (err) + break; + + if ((addis & 0xFFFF0000U) == 0x3D6B0000U && + (addi & 0xFFFF0000U) == 0x396B0000U && + rlwinm == 0x556C083CU && + add == 0x7D6C5A14U && + (li2 & 0xFFFF0000U) == 0x39800000U && + (addis2 & 0xFFFF0000U) == 0x3D8C0000U && + mtctr == 0x7D8903A6U && + (li3 & 0xFFFF0000U) == 0x39800000U && + (addis3 & 0xFFFF0000U) == 0x3D8C0000U && + bctr == 0x4E800420U) + { + regs->gpr[PT_R11] = + regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; + regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->ctr += (addis2 & 0xFFFFU) << 16; + regs->nip = regs->ctr; + return 4; + } + } + } while (0); +#endif + + do { /* PaX: unpatched PLT emulation #3 */ + unsigned int li, b; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(b, (unsigned int *)(regs->nip+4)); + + if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { + unsigned int addis, lwz, mtctr, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(addis, (unsigned int*)addr); + err |= get_user(lwz, (unsigned int*)(addr+4)); + err |= get_user(mtctr, (unsigned int*)(addr+8)); + err |= get_user(bctr, (unsigned int*)(addr+12)); + + if (err) + break; + + if ((addis & 0xFFFF0000U) == 0x3D6B0000U && + (lwz & 0xFFFF0000U) == 0x816B0000U && + mtctr == 0x7D6903A6U && + bctr == 0x4E800420U) + { + unsigned int r11; + + addr = (addis << 16) + (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + addr += (((lwz | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + + err = get_user(r11, (unsigned int*)addr); + if (err) + break; + + regs->gpr[PT_R11] = r11; + regs->ctr = r11; + regs->nip = r11; + return 4; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUSIGRT + do { /* PaX: sigreturn emulation */ + unsigned int li, sc; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(sc, (unsigned int *)(regs->nip+4)); + + if (!err && li == 0x38000000U + __NR_sigreturn && sc == 0x44000002U) { + struct vm_area_struct *vma; + unsigned long call_syscall; + + down_read(¤t->mm->mmap_sem); + call_syscall = current->mm->call_syscall; + up_read(¤t->mm->mmap_sem); + if (likely(call_syscall)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_syscall) { + call_syscall = current->mm->call_syscall; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_syscall & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_syscall)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_syscall = call_syscall; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->gpr[PT_R0] = __NR_sigreturn; + regs->nip = call_syscall; + return 5; + } + } while (0); + + do { /* PaX: rt_sigreturn emulation */ + unsigned int li, sc; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(sc, (unsigned int *)(regs->nip+4)); + + if (!err && li == 0x38000000U + __NR_rt_sigreturn && sc == 0x44000002U) { + struct vm_area_struct *vma; + unsigned int call_syscall; + + down_read(¤t->mm->mmap_sem); + call_syscall = current->mm->call_syscall; + up_read(¤t->mm->mmap_sem); + if (likely(call_syscall)) + goto rt_emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_syscall) { + call_syscall = current->mm->call_syscall; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto rt_emulate; + } + + call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_syscall & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_syscall)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_syscall = call_syscall; + up_write(¤t->mm->mmap_sem); + +rt_emulate: + regs->gpr[PT_R0] = __NR_rt_sigreturn; + regs->nip = call_syscall; + return 6; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * Check whether the instruction at regs->nip is a store using * an update addressing form which will update r1. @@ -168,7 +532,7 @@ * indicate errors in DSISR but can validly be set in SRR1. */ if (trap == 0x400) - error_code &= 0x48200000; + error_code &= 0x58200000; else is_write = error_code & DSISR_ISSTORE; #else @@ -295,9 +659,9 @@ /* protection fault */ if (error_code & DSISR_PROTFAULT) goto bad_area; +#endif if (!(vma->vm_flags & VM_EXEC)) goto bad_area; -#endif #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) pte_t *ptep; pmd_t *pmdp; @@ -368,6 +732,37 @@ bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { +#ifdef CONFIG_PPC64 + if (is_exec && (error_code & DSISR_PROTFAULT)) { +#else + if (is_exec && regs->nip == address) { +#endif + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + case 4: + return 0; +#endif + +#ifdef CONFIG_PAX_EMUSIGRT + case 5: + case 6: + return 0; +#endif + + } + + pax_report_fault(regs, (void*)regs->nip, (void*)regs->gpr[PT_R1]); + do_exit(SIGKILL); + } + } +#endif + _exception(SIGSEGV, regs, code, address); return 0; } diff -Naur linux-2.6.19.2.orig/arch/powerpc/mm/hugetlbpage.c linux-2.6.19.2/arch/powerpc/mm/hugetlbpage.c --- linux-2.6.19.2.orig/arch/powerpc/mm/hugetlbpage.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/powerpc/mm/hugetlbpage.c 2007-01-12 23:27:22.000000000 +0000 @@ -563,6 +563,10 @@ if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); @@ -574,7 +578,7 @@ if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -607,8 +611,8 @@ } /* Make sure we didn't miss any holes */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } @@ -643,6 +647,11 @@ mm->free_area_cache = base; /* requesting a specific address */ + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); @@ -722,12 +731,20 @@ * can happen with large stack limits and large mmap() * allocations. */ - mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; mm->cached_hole_size = ~0UL; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ + mm->mmap_base = base; mm->free_area_cache = base; mm->cached_hole_size = ~0UL; diff -Naur linux-2.6.19.2.orig/arch/powerpc/mm/mmap.c linux-2.6.19.2/arch/powerpc/mm/mmap.c --- linux-2.6.19.2.orig/arch/powerpc/mm/mmap.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/powerpc/mm/mmap.c 2007-01-12 23:27:22.000000000 +0000 @@ -74,10 +74,22 @@ */ if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -Naur linux-2.6.19.2.orig/arch/ppc/mm/fault.c linux-2.6.19.2/arch/ppc/mm/fault.c --- linux-2.6.19.2.orig/arch/ppc/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/ppc/mm/fault.c 2007-01-12 23:27:22.000000000 +0000 @@ -25,6 +25,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -48,6 +53,379 @@ unsigned long pte_errors; /* updated by do_page_fault() */ unsigned int probingmem; +#ifdef CONFIG_PAX_EMUSIGRT +void pax_syscall_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_syscall = 0UL; +} + +static struct page* pax_syscall_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x44000002U; /* sc */ + __flush_dcache_icache(kaddr); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + .close = pax_syscall_close, + .nopage = pax_syscall_nopage, +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + * 2 when patched GOT trampoline was detected + * 3 when patched PLT trampoline was detected + * 4 when unpatched PLT trampoline was detected + * 5 when sigreturn trampoline was detected + * 6 when rt_sigreturn trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) + int err; +#endif + +#ifdef CONFIG_PAX_EMUPLT + do { /* PaX: patched GOT emulation */ + unsigned int blrl; + + err = get_user(blrl, (unsigned int*)regs->nip); + + if (!err && blrl == 0x4E800021U) { + unsigned long temp = regs->nip; + + regs->nip = regs->link & 0xFFFFFFFCUL; + regs->link = temp + 4UL; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #1 */ + unsigned int b; + + err = get_user(b, (unsigned int *)regs->nip); + + if (!err && (b & 0xFC000003U) == 0x48000000U) { + regs->nip += (((b | 0xFC000000UL) ^ 0x02000000UL) + 0x02000000UL); + return 3; + } + } while (0); + + do { /* PaX: unpatched PLT emulation #1 */ + unsigned int li, b; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(b, (unsigned int *)(regs->nip+4)); + + if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { + unsigned int rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(rlwinm, (unsigned int*)addr); + err |= get_user(add, (unsigned int*)(addr+4)); + err |= get_user(li2, (unsigned int*)(addr+8)); + err |= get_user(addis2, (unsigned int*)(addr+12)); + err |= get_user(mtctr, (unsigned int*)(addr+16)); + err |= get_user(li3, (unsigned int*)(addr+20)); + err |= get_user(addis3, (unsigned int*)(addr+24)); + err |= get_user(bctr, (unsigned int*)(addr+28)); + + if (err) + break; + + if (rlwinm == 0x556C083CU && + add == 0x7D6C5A14U && + (li2 & 0xFFFF0000U) == 0x39800000U && + (addis2 & 0xFFFF0000U) == 0x3D8C0000U && + mtctr == 0x7D8903A6U && + (li3 & 0xFFFF0000U) == 0x39800000U && + (addis3 & 0xFFFF0000U) == 0x3D8C0000U && + bctr == 0x4E800420U) + { + regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; + regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->ctr += (addis2 & 0xFFFFU) << 16; + regs->nip = regs->ctr; + return 4; + } + } + } while (0); + +#if 0 + do { /* PaX: unpatched PLT emulation #2 */ + unsigned int lis, lwzu, b, bctr; + + err = get_user(lis, (unsigned int *)regs->nip); + err |= get_user(lwzu, (unsigned int *)(regs->nip+4)); + err |= get_user(b, (unsigned int *)(regs->nip+8)); + err |= get_user(bctr, (unsigned int *)(regs->nip+12)); + + if (err) + break; + + if ((lis & 0xFFFF0000U) == 0x39600000U && + (lwzu & 0xU) == 0xU && + (b & 0xFC000003U) == 0x48000000U && + bctr == 0x4E800420U) + { + unsigned int addis, addi, rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 12 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(addis, (unsigned int*)addr); + err |= get_user(addi, (unsigned int*)(addr+4)); + err |= get_user(rlwinm, (unsigned int*)(addr+8)); + err |= get_user(add, (unsigned int*)(addr+12)); + err |= get_user(li2, (unsigned int*)(addr+16)); + err |= get_user(addis2, (unsigned int*)(addr+20)); + err |= get_user(mtctr, (unsigned int*)(addr+24)); + err |= get_user(li3, (unsigned int*)(addr+28)); + err |= get_user(addis3, (unsigned int*)(addr+32)); + err |= get_user(bctr, (unsigned int*)(addr+36)); + + if (err) + break; + + if ((addis & 0xFFFF0000U) == 0x3D6B0000U && + (addi & 0xFFFF0000U) == 0x396B0000U && + rlwinm == 0x556C083CU && + add == 0x7D6C5A14U && + (li2 & 0xFFFF0000U) == 0x39800000U && + (addis2 & 0xFFFF0000U) == 0x3D8C0000U && + mtctr == 0x7D8903A6U && + (li3 & 0xFFFF0000U) == 0x39800000U && + (addis3 & 0xFFFF0000U) == 0x3D8C0000U && + bctr == 0x4E800420U) + { + regs->gpr[PT_R11] = + regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; + regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->ctr += (addis2 & 0xFFFFU) << 16; + regs->nip = regs->ctr; + return 4; + } + } + } while (0); +#endif + + do { /* PaX: unpatched PLT emulation #3 */ + unsigned int li, b; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(b, (unsigned int *)(regs->nip+4)); + + if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { + unsigned int addis, lwz, mtctr, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(addis, (unsigned int*)addr); + err |= get_user(lwz, (unsigned int*)(addr+4)); + err |= get_user(mtctr, (unsigned int*)(addr+8)); + err |= get_user(bctr, (unsigned int*)(addr+12)); + + if (err) + break; + + if ((addis & 0xFFFF0000U) == 0x3D6B0000U && + (lwz & 0xFFFF0000U) == 0x816B0000U && + mtctr == 0x7D6903A6U && + bctr == 0x4E800420U) + { + unsigned int r11; + + addr = (addis << 16) + (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + addr += (((lwz | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + + err = get_user(r11, (unsigned int*)addr); + if (err) + break; + + regs->gpr[PT_R11] = r11; + regs->ctr = r11; + regs->nip = r11; + return 4; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUSIGRT + do { /* PaX: sigreturn emulation */ + unsigned int li, sc; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(sc, (unsigned int *)(regs->nip+4)); + + if (!err && li == 0x38000000U + __NR_sigreturn && sc == 0x44000002U) { + struct vm_area_struct *vma; + unsigned long call_syscall; + + down_read(¤t->mm->mmap_sem); + call_syscall = current->mm->call_syscall; + up_read(¤t->mm->mmap_sem); + if (likely(call_syscall)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_syscall) { + call_syscall = current->mm->call_syscall; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_syscall & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_syscall)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_syscall = call_syscall; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->gpr[PT_R0] = __NR_sigreturn; + regs->nip = call_syscall; + return 5; + } + } while (0); + + do { /* PaX: rt_sigreturn emulation */ + unsigned int li, sc; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(sc, (unsigned int *)(regs->nip+4)); + + if (!err && li == 0x38000000U + __NR_rt_sigreturn && sc == 0x44000002U) { + struct vm_area_struct *vma; + unsigned int call_syscall; + + down_read(¤t->mm->mmap_sem); + call_syscall = current->mm->call_syscall; + up_read(¤t->mm->mmap_sem); + if (likely(call_syscall)) + goto rt_emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_syscall) { + call_syscall = current->mm->call_syscall; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto rt_emulate; + } + + call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_syscall & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_syscall)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_syscall = call_syscall; + up_write(¤t->mm->mmap_sem); + +rt_emulate: + regs->gpr[PT_R0] = __NR_rt_sigreturn; + regs->nip = call_syscall; + return 6; + } + } while (0); +#endif + + return 1; +} + +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) + int err; +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * Check whether the instruction at regs->nip is a store using * an update addressing form which will update r1. @@ -108,7 +486,7 @@ * indicate errors in DSISR but can validly be set in SRR1. */ if (TRAP(regs) == 0x400) - error_code &= 0x48200000; + error_code &= 0x58200000; else is_write = error_code & 0x02000000; #endif /* CONFIG_4xx || CONFIG_BOOKE */ @@ -203,15 +581,14 @@ pte_t *ptep; pmd_t *pmdp; -#if 0 +#if 1 /* It would be nice to actually enforce the VM execute permission on CPUs which can do so, but far too much stuff in userspace doesn't get the permissions right, so we let any page be executed for now. */ if (! (vma->vm_flags & VM_EXEC)) goto bad_area; -#endif - +#else /* Since 4xx/Book-E supports per-page execute permission, * we lazily flush dcache to icache. */ ptep = NULL; @@ -234,6 +611,7 @@ pte_unmap_unlock(ptep, ptl); } #endif +#endif /* a read */ } else { /* protection fault */ @@ -279,6 +657,33 @@ /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { + if ((TRAP(regs) == 0x400) && (regs->nip == address)) { + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + case 4: + return 0; +#endif + +#ifdef CONFIG_PAX_EMUSIGRT + case 5: + case 6: + return 0; +#endif + + } + + pax_report_fault(regs, (void*)regs->nip, (void*)regs->gpr[1]); + do_exit(SIGKILL); + } + } +#endif + _exception(SIGSEGV, regs, code, address); return 0; } diff -Naur linux-2.6.19.2.orig/arch/s390/kernel/module.c linux-2.6.19.2/arch/s390/kernel/module.c --- linux-2.6.19.2.orig/arch/s390/kernel/module.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/s390/kernel/module.c 2007-01-12 23:27:23.000000000 +0000 @@ -164,11 +164,11 @@ /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + me->core_size_rw = ALIGN(me->core_size_rw, 4); + me->arch.got_offset = me->core_size_rw; + me->core_size_rw += me->arch.got_size; + me->arch.plt_offset = me->core_size_rx; + me->core_size_rx += me->arch.plt_size; return 0; } @@ -254,7 +254,7 @@ if (info->got_initialized == 0) { Elf_Addr *gotent; - gotent = me->module_core + me->arch.got_offset + + gotent = me->module_core_rw + me->arch.got_offset + info->got_offset; *gotent = val; info->got_initialized = 1; @@ -278,7 +278,7 @@ else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) *(unsigned int *) loc = - (val + (Elf_Addr) me->module_core - loc) >> 1; + (val + (Elf_Addr) me->module_core_rw - loc) >> 1; else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) *(unsigned long *) loc = val; @@ -292,7 +292,7 @@ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + + ip = me->module_core_rx + me->arch.plt_offset + info->plt_offset; #ifndef CONFIG_64BIT ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ @@ -314,7 +314,7 @@ val = me->arch.plt_offset - me->arch.got_offset + info->plt_offset + rela->r_addend; else - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->module_core_rx + me->arch.plt_offset + info->plt_offset + rela->r_addend - loc; if (r_type == R_390_PLT16DBL) @@ -334,7 +334,7 @@ case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->module_core_rw + me->arch.got_offset); if (r_type == R_390_GOTOFF16) *(unsigned short *) loc = val; else if (r_type == R_390_GOTOFF32) @@ -344,7 +344,7 @@ break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->module_core_rw + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) *(unsigned int *) loc = val; diff -Naur linux-2.6.19.2.orig/arch/sparc/Makefile linux-2.6.19.2/arch/sparc/Makefile --- linux-2.6.19.2.orig/arch/sparc/Makefile 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc/Makefile 2007-01-12 23:27:23.000000000 +0000 @@ -36,7 +36,7 @@ # Renaming is done to avoid confusing pattern matching rules in 2.5.45 (multy-) INIT_Y := $(patsubst %/, %/built-in.o, $(init-y)) CORE_Y := $(core-y) -CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ CORE_Y := $(patsubst %/, %/built-in.o, $(CORE_Y)) DRIVERS_Y := $(patsubst %/, %/built-in.o, $(drivers-y)) NET_Y := $(patsubst %/, %/built-in.o, $(net-y)) diff -Naur linux-2.6.19.2.orig/arch/sparc/kernel/ptrace.c linux-2.6.19.2/arch/sparc/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/sparc/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc/kernel/ptrace.c 2007-01-12 23:27:23.000000000 +0000 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -300,6 +301,11 @@ goto out; } + if (gr_handle_ptrace(child, request)) { + pt_error_return(regs, EPERM); + goto out_tsk; + } + if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { if (ptrace_attach(child)) { diff -Naur linux-2.6.19.2.orig/arch/sparc/kernel/sys_sparc.c linux-2.6.19.2/arch/sparc/kernel/sys_sparc.c --- linux-2.6.19.2.orig/arch/sparc/kernel/sys_sparc.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc/kernel/sys_sparc.c 2007-01-12 23:27:23.000000000 +0000 @@ -57,7 +57,7 @@ if (ARCH_SUN4C_SUN4 && len > 0x20000000) return -ENOMEM; if (!addr) - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (flags & MAP_SHARED) addr = COLOUR_ALIGN(addr); diff -Naur linux-2.6.19.2.orig/arch/sparc/mm/fault.c linux-2.6.19.2/arch/sparc/mm/fault.c --- linux-2.6.19.2.orig/arch/sparc/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc/mm/fault.c 2007-01-12 23:27:23.000000000 +0000 @@ -21,6 +21,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -217,6 +221,252 @@ return safe_compute_effective_address(regs, insn); } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_emuplt_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static struct page* pax_emuplt_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(page); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + .close = pax_emuplt_close, + .nopage = pax_emuplt_nopage, +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} + +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int*)regs->pc); + err |= get_user(sethi2, (unsigned int*)(regs->pc+4)); + err |= get_user(jmpl, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned int addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int*)regs->pc); + + if (!err && (ba & 0xFFC00000U) == 0x30800000U) { + unsigned int addr; + + addr = regs->pc + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, jmpl, nop; + + err = get_user(sethi, (unsigned int*)regs->pc); + err |= get_user(jmpl, (unsigned int*)(regs->pc+4)); + err |= get_user(nop, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U && + nop == 0x01000000U) + { + unsigned int addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int*)regs->pc); + err |= get_user(ba, (unsigned int*)(regs->pc+4)); + err |= get_user(nop, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned int addr, save, call; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->pc + 4 + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + else + addr = regs->pc + 4 + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); + + err = get_user(save, (unsigned int*)addr); + err |= get_user(call, (unsigned int*)(addr+4)); + err |= get_user(nop, (unsigned int*)(addr+8)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_dl_resolve)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_dl_resolve = call_dl_resolve; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->pc = call_dl_resolve; + regs->npc = addr+4; + return 3; + } + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 2 */ + unsigned int save, call, nop; + + err = get_user(save, (unsigned int*)(regs->pc-4)); + err |= get_user(call, (unsigned int*)regs->pc); + err |= get_user(nop, (unsigned int*)(regs->pc+4)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + unsigned int dl_resolve = regs->pc + ((((call | 0xC0000000U) ^ 0x20000000U) + 0x20000000U) << 2); + + regs->u_regs[UREG_RETPC] = regs->pc; + regs->pc = dl_resolve; + regs->npc = dl_resolve+4; + return 3; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long address) { @@ -280,6 +530,24 @@ if(!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && text_fault && !(vma->vm_flags & VM_EXEC)) { + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->pc, (void*)regs->u_regs[UREG_FP]); + do_exit(SIGKILL); + } +#endif + /* Allow reads even for write-only mappings */ if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; diff -Naur linux-2.6.19.2.orig/arch/sparc/mm/init.c linux-2.6.19.2/arch/sparc/mm/init.c --- linux-2.6.19.2.orig/arch/sparc/mm/init.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc/mm/init.c 2007-01-12 23:27:23.000000000 +0000 @@ -333,17 +333,17 @@ /* Initialize the protection map with non-constant, MMU dependent values. */ protection_map[0] = PAGE_NONE; - protection_map[1] = PAGE_READONLY; - protection_map[2] = PAGE_COPY; - protection_map[3] = PAGE_COPY; + protection_map[1] = PAGE_READONLY_NOEXEC; + protection_map[2] = PAGE_COPY_NOEXEC; + protection_map[3] = PAGE_COPY_NOEXEC; protection_map[4] = PAGE_READONLY; protection_map[5] = PAGE_READONLY; protection_map[6] = PAGE_COPY; protection_map[7] = PAGE_COPY; protection_map[8] = PAGE_NONE; - protection_map[9] = PAGE_READONLY; - protection_map[10] = PAGE_SHARED; - protection_map[11] = PAGE_SHARED; + protection_map[9] = PAGE_READONLY_NOEXEC; + protection_map[10] = PAGE_SHARED_NOEXEC; + protection_map[11] = PAGE_SHARED_NOEXEC; protection_map[12] = PAGE_READONLY; protection_map[13] = PAGE_READONLY; protection_map[14] = PAGE_SHARED; diff -Naur linux-2.6.19.2.orig/arch/sparc/mm/srmmu.c linux-2.6.19.2/arch/sparc/mm/srmmu.c --- linux-2.6.19.2.orig/arch/sparc/mm/srmmu.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc/mm/srmmu.c 2007-01-12 23:27:23.000000000 +0000 @@ -2160,6 +2160,13 @@ BTFIXUPSET_INT(page_shared, pgprot_val(SRMMU_PAGE_SHARED)); BTFIXUPSET_INT(page_copy, pgprot_val(SRMMU_PAGE_COPY)); BTFIXUPSET_INT(page_readonly, pgprot_val(SRMMU_PAGE_RDONLY)); + +#ifdef CONFIG_PAX_PAGEEXEC + BTFIXUPSET_INT(page_shared_noexec, pgprot_val(SRMMU_PAGE_SHARED_NOEXEC)); + BTFIXUPSET_INT(page_copy_noexec, pgprot_val(SRMMU_PAGE_COPY_NOEXEC)); + BTFIXUPSET_INT(page_readonly_noexec, pgprot_val(SRMMU_PAGE_RDONLY_NOEXEC)); +#endif + BTFIXUPSET_INT(page_kernel, pgprot_val(SRMMU_PAGE_KERNEL)); page_kernel = pgprot_val(SRMMU_PAGE_KERNEL); diff -Naur linux-2.6.19.2.orig/arch/sparc64/kernel/ptrace.c linux-2.6.19.2/arch/sparc64/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/sparc64/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc64/kernel/ptrace.c 2007-01-12 23:27:23.000000000 +0000 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -213,6 +214,11 @@ goto out; } + if (gr_handle_ptrace(child, (long)request)) { + pt_error_return(regs, EPERM); + goto out_tsk; + } + if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { if (ptrace_attach(child)) { diff -Naur linux-2.6.19.2.orig/arch/sparc64/kernel/sys_sparc.c linux-2.6.19.2/arch/sparc64/kernel/sys_sparc.c --- linux-2.6.19.2.orig/arch/sparc64/kernel/sys_sparc.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc64/kernel/sys_sparc.c 2007-01-12 23:27:23.000000000 +0000 @@ -140,6 +140,10 @@ if (filp || (flags & MAP_SHARED)) do_color_align = 1; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -153,9 +157,9 @@ } if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; + start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -175,8 +179,8 @@ vma = find_vma(mm, VA_EXCLUDE_END); } if (unlikely(task_size < addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } @@ -379,6 +383,12 @@ current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || sysctl_legacy_va_layout) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { @@ -393,6 +403,12 @@ gap = (task_size / 6 * 5); mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -Naur linux-2.6.19.2.orig/arch/sparc64/mm/fault.c linux-2.6.19.2/arch/sparc64/mm/fault.c --- linux-2.6.19.2.orig/arch/sparc64/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/sparc64/mm/fault.c 2007-01-12 23:27:23.000000000 +0000 @@ -20,6 +20,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -290,6 +294,369 @@ unhandled_fault (address, current, regs); } +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_EMUPLT +static void pax_emuplt_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static struct page* pax_emuplt_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(page); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + .close = pax_emuplt_close, + .nopage = pax_emuplt_nopage, +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +/* + * PaX: decide what to do with offenders (regs->tpc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int*)regs->tpc); + err |= get_user(sethi2, (unsigned int*)(regs->tpc+4)); + err |= get_user(jmpl, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int*)regs->tpc); + + if (!err && (ba & 0xFFC00000U) == 0x30800000U) { + unsigned long addr; + + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, jmpl, nop; + + err = get_user(sethi, (unsigned int*)regs->tpc); + err |= get_user(jmpl, (unsigned int*)(regs->tpc+4)); + err |= get_user(nop, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U && + nop == 0x01000000U) + { + unsigned long addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #4 */ + unsigned int mov1, call, mov2; + + err = get_user(mov1, (unsigned int*)regs->tpc); + err |= get_user(call, (unsigned int*)(regs->tpc+4)); + err |= get_user(mov2, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if (mov1 == 0x8210000FU && + (call & 0xC0000000U) == 0x40000000U && + mov2 == 0x9E100001U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = regs->u_regs[UREG_RETPC]; + addr = regs->tpc + 4 + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #5 */ + unsigned int sethi1, sethi2, or1, or2, sllx, jmpl, nop; + + err = get_user(sethi1, (unsigned int*)regs->tpc); + err |= get_user(sethi2, (unsigned int*)(regs->tpc+4)); + err |= get_user(or1, (unsigned int*)(regs->tpc+8)); + err |= get_user(or2, (unsigned int*)(regs->tpc+12)); + err |= get_user(sllx, (unsigned int*)(regs->tpc+16)); + err |= get_user(jmpl, (unsigned int*)(regs->tpc+20)); + err |= get_user(nop, (unsigned int*)(regs->tpc+24)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + (or1 & 0xFFFFE000U) == 0x82106000U && + (or2 & 0xFFFFE000U) == 0x8A116000U && + sllx == 0x83287020 && + jmpl == 0x81C04005U && + nop == 0x01000000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU); + regs->u_regs[UREG_G1] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU); + addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #6 */ + unsigned int sethi1, sethi2, sllx, or, jmpl, nop; + + err = get_user(sethi1, (unsigned int*)regs->tpc); + err |= get_user(sethi2, (unsigned int*)(regs->tpc+4)); + err |= get_user(sllx, (unsigned int*)(regs->tpc+8)); + err |= get_user(or, (unsigned int*)(regs->tpc+12)); + err |= get_user(jmpl, (unsigned int*)(regs->tpc+16)); + err |= get_user(nop, (unsigned int*)(regs->tpc+20)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + sllx == 0x83287020 && + (or & 0xFFFFE000U) == 0x8A116000U && + jmpl == 0x81C04005U && + nop == 0x01000000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = (sethi1 & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or & 0x3FFU); + addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #7 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int*)regs->tpc); + err |= get_user(ba, (unsigned int*)(regs->tpc+4)); + err |= get_user(nop, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (ba & 0xFFF00000U) == 0x30600000U && + nop == 0x01000000U) + { + unsigned long addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int*)regs->tpc); + err |= get_user(ba, (unsigned int*)(regs->tpc+4)); + err |= get_user(nop, (unsigned int*)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned long addr; + unsigned int save, call; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); + else + addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + + err = get_user(save, (unsigned int*)addr); + err |= get_user(call, (unsigned int*)(addr+4)); + err |= get_user(nop, (unsigned int*)(addr+8)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_dl_resolve)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_dl_resolve = call_dl_resolve; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->tpc = call_dl_resolve; + regs->tnpc = addr+4; + return 3; + } + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 2 */ + unsigned int save, call, nop; + + err = get_user(save, (unsigned int*)(regs->tpc-4)); + err |= get_user(call, (unsigned int*)regs->tpc); + err |= get_user(nop, (unsigned int*)(regs->tpc+4)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + unsigned long dl_resolve = regs->tpc + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); + + regs->u_regs[UREG_RETPC] = regs->tpc; + regs->tpc = dl_resolve; + regs->tnpc = dl_resolve+4; + return 3; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -332,8 +699,10 @@ goto intr_or_no_mm; if (test_thread_flag(TIF_32BIT)) { - if (!(regs->tstate & TSTATE_PRIV)) + if (!(regs->tstate & TSTATE_PRIV)) { regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } address &= 0xffffffff; } @@ -350,6 +719,29 @@ if (!vma) goto bad_area; +#ifdef CONFIG_PAX_PAGEEXEC + /* PaX: detect ITLB misses on non-exec pages */ + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && vma->vm_start <= address && + !(vma->vm_flags & VM_EXEC) && (fault_code & FAULT_CODE_ITLB)) + { + if (address != regs->tpc) + goto good_area; + + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->tpc, (void*)(regs->u_regs[UREG_FP] + STACK_BIAS)); + do_exit(SIGKILL); + } +#endif + /* Pure DTLB misses do not tell us whether the fault causing * load/store/atomic was a write or not, it only says that there * was no match. So in such a case we (carefully) read the diff -Naur linux-2.6.19.2.orig/arch/v850/kernel/module.c linux-2.6.19.2/arch/v850/kernel/module.c --- linux-2.6.19.2.orig/arch/v850/kernel/module.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/v850/kernel/module.c 2007-01-12 23:27:23.000000000 +0000 @@ -150,8 +150,8 @@ tramp[1] = ((val >> 16) & 0xffff) + 0x610000; /* ...; jmp r1 */ /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if (location >= mod->module_core_rx + && location < mod->module_core_rx + mod->core_size_rx) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; diff -Naur linux-2.6.19.2.orig/arch/x86_64/boot/compressed/head.S linux-2.6.19.2/arch/x86_64/boot/compressed/head.S --- linux-2.6.19.2.orig/arch/x86_64/boot/compressed/head.S 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/boot/compressed/head.S 2007-01-12 23:27:23.000000000 +0000 @@ -41,11 +41,13 @@ movl %eax,%gs lss stack_start,%esp + movl 0x000000,%ecx xorl %eax,%eax 1: incl %eax # check that A20 really IS enabled movl %eax,0x000000 # loop forever if it isn't cmpl %eax,0x100000 je 1b + movl %ecx,0x000000 /* * Initialize eflags. Some BIOS's leave bits like NT set. This would diff -Naur linux-2.6.19.2.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.19.2/arch/x86_64/ia32/ia32_binfmt.c --- linux-2.6.19.2.orig/arch/x86_64/ia32/ia32_binfmt.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/ia32/ia32_binfmt.c 2007-01-12 23:27:23.000000000 +0000 @@ -190,6 +190,17 @@ //#include #include +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) 0x08048000UL + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) 16 +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) 16 +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) 16 +#endif + typedef struct user_i387_ia32_struct elf_fpregset_t; typedef struct user32_fxsr_struct elf_fpxregset_t; diff -Naur linux-2.6.19.2.orig/arch/x86_64/ia32/mmap32.c linux-2.6.19.2/arch/x86_64/ia32/mmap32.c --- linux-2.6.19.2.orig/arch/x86_64/ia32/mmap32.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/ia32/mmap32.c 2007-01-12 23:27:23.000000000 +0000 @@ -68,10 +68,22 @@ (current->personality & ADDR_COMPAT_LAYOUT) || current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -Naur linux-2.6.19.2.orig/arch/x86_64/ia32/syscall32.c linux-2.6.19.2/arch/x86_64/ia32/syscall32.c --- linux-2.6.19.2.orig/arch/x86_64/ia32/syscall32.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/ia32/syscall32.c 2007-01-12 23:27:23.000000000 +0000 @@ -49,16 +49,21 @@ struct mm_struct *mm = current->mm; int ret; - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); if (!vma) return -ENOMEM; - memset(vma, 0, sizeof(struct vm_area_struct)); /* Could randomize here */ vma->vm_start = VSYSCALL32_BASE; vma->vm_end = VSYSCALL32_END; /* MAYWRITE to allow gdb to COW and set breakpoints */ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) + vma->vm_flags &= ~VM_MAYWRITE; +#endif + vma->vm_flags |= mm->def_flags; vma->vm_page_prot = protection_map[vma->vm_flags & 7]; vma->vm_ops = &syscall32_vm_ops; diff -Naur linux-2.6.19.2.orig/arch/x86_64/kernel/ioport.c linux-2.6.19.2/arch/x86_64/kernel/ioport.c --- linux-2.6.19.2.orig/arch/x86_64/kernel/ioport.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/kernel/ioport.c 2007-01-12 23:27:23.000000000 +0000 @@ -41,8 +41,16 @@ if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; + +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on) { + gr_handle_ioperm(); + return -EPERM; + } +#else if (turn_on && !capable(CAP_SYS_RAWIO)) return -EPERM; +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the @@ -111,8 +119,13 @@ return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + gr_handle_iopl(); + return -EPERM; +#else if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#endif } regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); return 0; diff -Naur linux-2.6.19.2.orig/arch/x86_64/kernel/process.c linux-2.6.19.2/arch/x86_64/kernel/process.c --- linux-2.6.19.2.orig/arch/x86_64/kernel/process.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/kernel/process.c 2007-01-12 23:27:23.000000000 +0000 @@ -875,10 +875,3 @@ return 1; } - -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} diff -Naur linux-2.6.19.2.orig/arch/x86_64/kernel/ptrace.c linux-2.6.19.2/arch/x86_64/kernel/ptrace.c --- linux-2.6.19.2.orig/arch/x86_64/kernel/ptrace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/kernel/ptrace.c 2007-01-12 23:27:23.000000000 +0000 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff -Naur linux-2.6.19.2.orig/arch/x86_64/kernel/setup64.c linux-2.6.19.2/arch/x86_64/kernel/setup64.c --- linux-2.6.19.2.orig/arch/x86_64/kernel/setup64.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/kernel/setup64.c 2007-01-12 23:27:23.000000000 +0000 @@ -38,7 +38,6 @@ unsigned long __supported_pte_mask __read_mostly = ~0UL; EXPORT_SYMBOL(__supported_pte_mask); -static int do_not_nx __cpuinitdata = 0; /* noexec=on|off Control non executable mappings for 64bit processes. @@ -52,16 +51,14 @@ return -EINVAL; if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 0; } early_param("noexec", nonx_setup); -int force_personality32 = 0; +int force_personality32; /* noexec32=on|off Control non executable heap for 32bit processes. @@ -175,7 +172,7 @@ unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) { + if (!(efer & EFER_NX)) { __supported_pte_mask &= ~_PAGE_NX; } } diff -Naur linux-2.6.19.2.orig/arch/x86_64/kernel/signal.c linux-2.6.19.2/arch/x86_64/kernel/signal.c --- linux-2.6.19.2.orig/arch/x86_64/kernel/signal.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/kernel/signal.c 2007-01-12 23:27:23.000000000 +0000 @@ -254,8 +254,8 @@ err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); if (sizeof(*set) == 16) { - __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); - __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); + err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); + err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); } else err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); diff -Naur linux-2.6.19.2.orig/arch/x86_64/kernel/sys_x86_64.c linux-2.6.19.2/arch/x86_64/kernel/sys_x86_64.c --- linux-2.6.19.2.orig/arch/x86_64/kernel/sys_x86_64.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/kernel/sys_x86_64.c 2007-01-12 23:27:23.000000000 +0000 @@ -65,8 +65,8 @@ return error; } -static void find_start_end(unsigned long flags, unsigned long *begin, - unsigned long *end) +static void find_start_end(struct mm_struct *mm, unsigned long flags, + unsigned long *begin, unsigned long *end) { if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { /* This is usually used needed to map code in small @@ -79,7 +79,7 @@ *begin = 0x40000000; *end = 0x80000000; } else { - *begin = TASK_UNMAPPED_BASE; + *begin = mm->mmap_base; *end = TASK_SIZE; } } @@ -93,11 +93,15 @@ unsigned long start_addr; unsigned long begin, end; - find_start_end(flags, &begin, &end); + find_start_end(mm, flags, &begin, &end); if (len > end) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); diff -Naur linux-2.6.19.2.orig/arch/x86_64/mm/fault.c linux-2.6.19.2/arch/x86_64/mm/fault.c --- linux-2.6.19.2.orig/arch/x86_64/mm/fault.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/mm/fault.c 2007-01-12 23:27:23.000000000 +0000 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -322,6 +323,33 @@ return 0; } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char __user *)pc+i)) + printk("?? "); + else + printk("%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-8: "); + for (i = -1; i < 10; i++) { + unsigned long c; + if (get_user(c, (unsigned long __user *)sp+i)) + printk("???????????????? "); + else + printk("%016lx ", c); + } + printk("\n"); +} +#endif + int page_fault_trace = 0; int exception_trace = 1; @@ -453,6 +481,8 @@ good_area: info.si_code = SEGV_ACCERR; write = 0; + if ((error_code & PF_INSTR) && !(vma->vm_flags & VM_EXEC)) + goto bad_area; switch (error_code & (PF_PROT|PF_WRITE)) { default: /* 3: write, present */ /* fall through */ @@ -519,7 +549,14 @@ tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); } - + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm && (mm->pax_flags & MF_PAX_PAGEEXEC) && (error_code & 16)) { + pax_report_fault(regs, (void*)regs->rip, (void*)regs->rsp); + do_exit(SIGKILL); + } +#endif + tsk->thread.cr2 = address; /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); diff -Naur linux-2.6.19.2.orig/arch/x86_64/mm/mmap.c linux-2.6.19.2/arch/x86_64/mm/mmap.c --- linux-2.6.19.2.orig/arch/x86_64/mm/mmap.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/arch/x86_64/mm/mmap.c 2007-01-12 23:27:23.000000000 +0000 @@ -23,6 +23,12 @@ unsigned rnd = get_random_int() & 0xfffffff; mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT; } + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } diff -Naur linux-2.6.19.2.orig/drivers/acpi/glue.c linux-2.6.19.2/drivers/acpi/glue.c --- linux-2.6.19.2.orig/drivers/acpi/glue.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/acpi/glue.c 2007-01-12 23:27:23.000000000 +0000 @@ -16,7 +16,7 @@ #if ACPI_GLUE_DEBUG #define DBG(x...) printk(PREFIX x) #else -#define DBG(x...) +#define DBG(x...) do {} while (0) #endif static LIST_HEAD(bus_type_list); static DECLARE_RWSEM(bus_type_sem); diff -Naur linux-2.6.19.2.orig/drivers/acpi/processor_core.c linux-2.6.19.2/drivers/acpi/processor_core.c --- linux-2.6.19.2.orig/drivers/acpi/processor_core.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/acpi/processor_core.c 2007-01-12 23:27:23.000000000 +0000 @@ -534,7 +534,7 @@ return 0; } - BUG_ON((pr->id >= NR_CPUS) || (pr->id < 0)); + BUG_ON(pr->id >= NR_CPUS); /* * Buggy BIOS check diff -Naur linux-2.6.19.2.orig/drivers/char/agp/frontend.c linux-2.6.19.2/drivers/char/agp/frontend.c --- linux-2.6.19.2.orig/drivers/char/agp/frontend.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/char/agp/frontend.c 2007-01-12 23:27:23.000000000 +0000 @@ -818,7 +818,7 @@ if (copy_from_user(&reserve, arg, sizeof(struct agp_region))) return -EFAULT; - if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment)) + if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment_priv)) return -EFAULT; client = agp_find_client_by_pid(reserve.pid); diff -Naur linux-2.6.19.2.orig/drivers/char/keyboard.c linux-2.6.19.2/drivers/char/keyboard.c --- linux-2.6.19.2.orig/drivers/char/keyboard.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/char/keyboard.c 2007-01-12 23:27:23.000000000 +0000 @@ -203,7 +203,7 @@ if (scancode >= dev->keycodemax) return -EINVAL; - if (keycode < 0 || keycode > KEY_MAX) + if (keycode > KEY_MAX) return -EINVAL; if (dev->keycodesize < sizeof(keycode) && (keycode >> (dev->keycodesize * 8))) return -EINVAL; @@ -628,6 +628,16 @@ kbd->kbdmode == VC_MEDIUMRAW) && value != KVAL(K_SAK)) return; /* SAK is allowed even in raw mode */ + +#if defined(CONFIG_GRKERNSEC_PROC) || defined(CONFIG_GRKERNSEC_PROC_MEMMAP) + { + void *func = fn_handler[value]; + if (func == fn_show_state || func == fn_show_ptregs || + func == fn_show_mem) + return; + } +#endif + fn_handler[value](vc); } diff -Naur linux-2.6.19.2.orig/drivers/char/mem.c linux-2.6.19.2/drivers/char/mem.c --- linux-2.6.19.2.orig/drivers/char/mem.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/char/mem.c 2007-01-12 23:27:23.000000000 +0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,10 @@ # include #endif +#ifdef CONFIG_GRKERNSEC +extern struct file_operations grsec_fops; +#endif + /* * Architectures vary in how they handle caching for addresses * outside of main memory. @@ -174,6 +179,11 @@ if (!valid_phys_addr_range(p, count)) return -EFAULT; +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_mem_write(); + return -EPERM; +#endif + written = 0; #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED @@ -275,6 +285,11 @@ if (!private_mapping_ok(vma)) return -ENOSYS; +#ifdef CONFIG_GRKERNSEC_KMEM + if (gr_handle_mem_mmap(vma->vm_pgoff << PAGE_SHIFT, vma)) + return -EPERM; +#endif + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); @@ -506,6 +521,11 @@ ssize_t written; char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_kmem_write(); + return -EPERM; +#endif + if (p < (unsigned long) high_memory) { wrote = count; @@ -645,8 +665,24 @@ if (count > size) count = size; +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_flags & VM_MIRROR) { + unsigned long addr_m; + struct vm_area_struct * vma_m; + + addr_m = vma->vm_start + vma->vm_mirror; + vma_m = find_vma(mm, addr_m); + if (vma_m && vma_m->vm_start == addr_m && (vma_m->vm_flags & VM_MIRROR)) { + addr_m = addr + vma->vm_mirror; + zap_page_range(vma_m, addr_m, count, NULL); + } else + printk(KERN_ERR "PAX: VMMIRROR: read_zero bug, %08lx, %08lx\n", + addr, vma->vm_start); + } +#endif + zap_page_range(vma, addr, count, NULL); - if (zeromap_page_range(vma, addr, count, PAGE_COPY)) + if (zeromap_page_range(vma, addr, count, vma->vm_page_prot)) break; size -= count; @@ -799,6 +835,16 @@ static int open_port(struct inode * inode, struct file * filp) { +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_open_port(); + return -EPERM; +#endif + + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static int open_mem(struct inode * inode, struct file * filp) +{ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } @@ -806,7 +852,6 @@ #define full_lseek null_lseek #define write_zero write_null #define read_full read_zero -#define open_mem open_port #define open_kmem open_mem #define open_oldmem open_mem @@ -939,6 +984,11 @@ filp->f_op = &oldmem_fops; break; #endif +#ifdef CONFIG_GRKERNSEC + case 13: + filp->f_op = &grsec_fops; + break; +#endif default: return -ENXIO; } @@ -971,6 +1021,9 @@ #ifdef CONFIG_CRASH_DUMP {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops}, #endif +#ifdef CONFIG_GRKERNSEC + {13,"grsec", S_IRUSR | S_IWUGO, &grsec_fops}, +#endif }; static struct class *mem_class; diff -Naur linux-2.6.19.2.orig/drivers/char/random.c linux-2.6.19.2/drivers/char/random.c --- linux-2.6.19.2.orig/drivers/char/random.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/char/random.c 2007-01-12 23:27:23.000000000 +0000 @@ -248,8 +248,13 @@ /* * Configuration information */ +#ifdef CONFIG_GRKERNSEC_RANDNET +#define INPUT_POOL_WORDS 512 +#define OUTPUT_POOL_WORDS 128 +#else #define INPUT_POOL_WORDS 128 #define OUTPUT_POOL_WORDS 32 +#endif #define SEC_XFER_SIZE 512 /* @@ -286,10 +291,17 @@ int poolwords; int tap1, tap2, tap3, tap4, tap5; } poolinfo_table[] = { +#ifdef CONFIG_GRKERNSEC_RANDNET + /* x^512 + x^411 + x^308 + x^208 +x^104 + x + 1 -- 225 */ + { 512, 411, 308, 208, 104, 1 }, + /* x^128 + x^103 + x^76 + x^51 + x^25 + x + 1 -- 105 */ + { 128, 103, 76, 51, 25, 1 }, +#else /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */ { 128, 103, 76, 51, 25, 1 }, /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */ { 32, 26, 20, 14, 7, 1 }, +#endif #if 0 /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ { 2048, 1638, 1231, 819, 411, 1 }, @@ -1662,3 +1674,25 @@ return 0; return PAGE_ALIGN(get_random_int() % range + start); } + +#if defined(CONFIG_PAX_ASLR) || defined(CONFIG_GRKERNSEC) +unsigned long pax_get_random_long(void) +{ + static time_t rekey_time; + static __u32 secret[12]; + time_t t; + + /* + * Pick a random secret every REKEY_INTERVAL seconds. + */ + t = get_seconds(); + if (!rekey_time || (t - rekey_time) > REKEY_INTERVAL) { + rekey_time = t; + get_random_bytes(secret, sizeof(secret)); + } + + secret[1] = half_md4_transform(secret+8, secret); + secret[0] = half_md4_transform(secret+8, secret); + return *(unsigned long *)secret; +} +#endif diff -Naur linux-2.6.19.2.orig/drivers/char/vt_ioctl.c linux-2.6.19.2/drivers/char/vt_ioctl.c --- linux-2.6.19.2.orig/drivers/char/vt_ioctl.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/char/vt_ioctl.c 2007-01-12 23:27:23.000000000 +0000 @@ -95,6 +95,12 @@ case KDSKBENT: if (!perm) return -EPERM; + +#ifdef CONFIG_GRKERNSEC + if (!capable(CAP_SYS_TTY_CONFIG)) + return -EPERM; +#endif + if (!i && v == K_NOSUCHMAP) { /* deallocate map */ key_map = key_maps[s]; @@ -235,6 +241,13 @@ goto reterr; } +#ifdef CONFIG_GRKERNSEC + if (!capable(CAP_SYS_TTY_CONFIG)) { + ret = -EPERM; + goto reterr; + } +#endif + q = func_table[i]; first_free = funcbufptr + (funcbufsize - funcbufleft); for (j = i+1; j < MAX_NR_FUNC && !func_table[j]; j++) diff -Naur linux-2.6.19.2.orig/drivers/edac/edac_mc.h linux-2.6.19.2/drivers/edac/edac_mc.h --- linux-2.6.19.2.orig/drivers/edac/edac_mc.h 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/edac/edac_mc.h 2007-01-12 23:27:23.000000000 +0000 @@ -71,11 +71,11 @@ #else /* !CONFIG_EDAC_DEBUG */ -#define debugf0( ... ) -#define debugf1( ... ) -#define debugf2( ... ) -#define debugf3( ... ) -#define debugf4( ... ) +#define debugf0( ... ) do {} while (0) +#define debugf1( ... ) do {} while (0) +#define debugf2( ... ) do {} while (0) +#define debugf3( ... ) do {} while (0) +#define debugf4( ... ) do {} while (0) #endif /* !CONFIG_EDAC_DEBUG */ diff -Naur linux-2.6.19.2.orig/drivers/hwmon/fscpos.c linux-2.6.19.2/drivers/hwmon/fscpos.c --- linux-2.6.19.2.orig/drivers/hwmon/fscpos.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/hwmon/fscpos.c 2007-01-12 23:27:23.000000000 +0000 @@ -231,7 +231,6 @@ unsigned long v = simple_strtoul(buf, NULL, 10); /* Range: 0..255 */ - if (v < 0) v = 0; if (v > 255) v = 255; mutex_lock(&data->update_lock); diff -Naur linux-2.6.19.2.orig/drivers/hwmon/w83791d.c linux-2.6.19.2/drivers/hwmon/w83791d.c --- linux-2.6.19.2.orig/drivers/hwmon/w83791d.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/hwmon/w83791d.c 2007-01-12 23:27:23.000000000 +0000 @@ -289,8 +289,8 @@ static int w83791d_detect(struct i2c_adapter *adapter, int address, int kind); static int w83791d_detach_client(struct i2c_client *client); -static int w83791d_read(struct i2c_client *client, u8 register); -static int w83791d_write(struct i2c_client *client, u8 register, u8 value); +static int w83791d_read(struct i2c_client *client, u8 reg); +static int w83791d_write(struct i2c_client *client, u8 reg, u8 value); static struct w83791d_data *w83791d_update_device(struct device *dev); #ifdef DEBUG diff -Naur linux-2.6.19.2.orig/drivers/ide/ide-cd.c linux-2.6.19.2/drivers/ide/ide-cd.c --- linux-2.6.19.2.orig/drivers/ide/ide-cd.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/ide/ide-cd.c 2007-01-12 23:27:23.000000000 +0000 @@ -457,8 +457,6 @@ sector &= ~(bio_sectors -1); valid = (sector - failed_command->sector) << 9; - if (valid < 0) - valid = 0; if (sector < get_capacity(info->disk) && drive->probed_capacity - sector < 4 * 75) { set_capacity(info->disk, sector); diff -Naur linux-2.6.19.2.orig/drivers/ieee1394/dv1394.c linux-2.6.19.2/drivers/ieee1394/dv1394.c --- linux-2.6.19.2.orig/drivers/ieee1394/dv1394.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/ieee1394/dv1394.c 2007-01-12 23:27:23.000000000 +0000 @@ -919,7 +919,7 @@ /* default SYT offset is 3 cycles */ init->syt_offset = 3; - if ( (init->channel > 63) || (init->channel < 0) ) + if (init->channel > 63) init->channel = 63; chan_mask = (u64)1 << init->channel; diff -Naur linux-2.6.19.2.orig/drivers/ieee1394/hosts.c linux-2.6.19.2/drivers/ieee1394/hosts.c --- linux-2.6.19.2.orig/drivers/ieee1394/hosts.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/ieee1394/hosts.c 2007-01-12 23:27:23.000000000 +0000 @@ -75,6 +75,7 @@ } static struct hpsb_host_driver dummy_driver = { + .name = "dummy", .transmit_packet = dummy_transmit_packet, .devctl = dummy_devctl, .isoctl = dummy_isoctl diff -Naur linux-2.6.19.2.orig/drivers/ieee1394/ohci1394.c linux-2.6.19.2/drivers/ieee1394/ohci1394.c --- linux-2.6.19.2.orig/drivers/ieee1394/ohci1394.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/ieee1394/ohci1394.c 2007-01-12 23:27:23.000000000 +0000 @@ -161,9 +161,9 @@ printk(level "%s: fw-host%d: " fmt "\n" , OHCI1394_DRIVER_NAME, ohci->host->id , ## args) /* Module Parameters */ -static int phys_dma = 1; +static int phys_dma = 0; module_param(phys_dma, int, 0444); -MODULE_PARM_DESC(phys_dma, "Enable physical dma (default = 1)."); +MODULE_PARM_DESC(phys_dma, "Enable physical dma (default = 0)."); static void dma_trm_tasklet(unsigned long data); static void dma_trm_reset(struct dma_trm_ctx *d); diff -Naur linux-2.6.19.2.orig/drivers/ieee1394/sbp2.c linux-2.6.19.2/drivers/ieee1394/sbp2.c --- linux-2.6.19.2.orig/drivers/ieee1394/sbp2.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/ieee1394/sbp2.c 2007-01-12 23:27:23.000000000 +0000 @@ -2664,7 +2664,7 @@ .sdev_attrs = sbp2_sysfs_sdev_attrs, }; -static int sbp2_module_init(void) +static int __init sbp2_module_init(void) { int ret; diff -Naur linux-2.6.19.2.orig/drivers/ieee1394/video1394.c linux-2.6.19.2/drivers/ieee1394/video1394.c --- linux-2.6.19.2.orig/drivers/ieee1394/video1394.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/ieee1394/video1394.c 2007-01-12 23:27:23.000000000 +0000 @@ -890,7 +890,7 @@ d = find_ctx(&ctx->context_list, OHCI_ISO_RECEIVE, v.channel); if (d == NULL) return -EFAULT; - if ((v.buffer<0) || (v.buffer>=d->num_desc - 1)) { + if (v.buffer>=d->num_desc - 1) { PRINT(KERN_ERR, ohci->host->id, "Buffer %d out of range",v.buffer); return -EINVAL; @@ -955,7 +955,7 @@ d = find_ctx(&ctx->context_list, OHCI_ISO_RECEIVE, v.channel); if (d == NULL) return -EFAULT; - if ((v.buffer<0) || (v.buffer>d->num_desc - 1)) { + if (v.buffer>d->num_desc - 1) { PRINT(KERN_ERR, ohci->host->id, "Buffer %d out of range",v.buffer); return -EINVAL; @@ -1026,7 +1026,7 @@ d = find_ctx(&ctx->context_list, OHCI_ISO_TRANSMIT, v.channel); if (d == NULL) return -EFAULT; - if ((v.buffer<0) || (v.buffer>=d->num_desc - 1)) { + if (v.buffer>=d->num_desc - 1) { PRINT(KERN_ERR, ohci->host->id, "Buffer %d out of range",v.buffer); return -EINVAL; @@ -1128,7 +1128,7 @@ d = find_ctx(&ctx->context_list, OHCI_ISO_TRANSMIT, v.channel); if (d == NULL) return -EFAULT; - if ((v.buffer<0) || (v.buffer>=d->num_desc-1)) { + if (v.buffer>=d->num_desc-1) { PRINT(KERN_ERR, ohci->host->id, "Buffer %d out of range",v.buffer); return -EINVAL; diff -Naur linux-2.6.19.2.orig/drivers/md/bitmap.c linux-2.6.19.2/drivers/md/bitmap.c --- linux-2.6.19.2.orig/drivers/md/bitmap.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/md/bitmap.c 2007-01-12 23:27:23.000000000 +0000 @@ -57,7 +57,7 @@ # if DEBUG > 0 # define PRINTK(x...) printk(KERN_DEBUG x) # else -# define PRINTK(x...) +# define PRINTK(x...) do {} while (0) # endif #endif diff -Naur linux-2.6.19.2.orig/drivers/mtd/devices/doc2001.c linux-2.6.19.2/drivers/mtd/devices/doc2001.c --- linux-2.6.19.2.orig/drivers/mtd/devices/doc2001.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/mtd/devices/doc2001.c 2007-01-12 23:27:23.000000000 +0000 @@ -401,6 +401,8 @@ /* Don't allow read past end of device */ if (from >= this->totlen) return -EINVAL; + if (!len) + return -EINVAL; /* Don't allow a single read to cross a 512-byte block boundary */ if (from + len > ((from | 0x1ff) + 1)) diff -Naur linux-2.6.19.2.orig/drivers/net/eepro100.c linux-2.6.19.2/drivers/net/eepro100.c --- linux-2.6.19.2.orig/drivers/net/eepro100.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/net/eepro100.c 2007-01-12 23:27:23.000000000 +0000 @@ -47,7 +47,7 @@ # define rx_align(skb) skb_reserve((skb), 2) # define RxFD_ALIGNMENT __attribute__ ((aligned (2), packed)) #else -# define rx_align(skb) +# define rx_align(skb) do {} while (0) # define RxFD_ALIGNMENT #endif diff -Naur linux-2.6.19.2.orig/drivers/net/pcnet32.c linux-2.6.19.2/drivers/net/pcnet32.c --- linux-2.6.19.2.orig/drivers/net/pcnet32.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/net/pcnet32.c 2007-01-12 23:27:23.000000000 +0000 @@ -82,7 +82,7 @@ /* * VLB I/O addresses */ -static unsigned int pcnet32_portlist[] __initdata = +static unsigned int pcnet32_portlist[] __devinitdata = { 0x300, 0x320, 0x340, 0x360, 0 }; static int pcnet32_debug = 0; diff -Naur linux-2.6.19.2.orig/drivers/pci/proc.c linux-2.6.19.2/drivers/pci/proc.c --- linux-2.6.19.2.orig/drivers/pci/proc.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/pci/proc.c 2007-01-12 23:27:23.000000000 +0000 @@ -467,7 +467,15 @@ { struct proc_dir_entry *entry; struct pci_dev *dev = NULL; +#ifdef CONFIG_GRKERNSEC_PROC_ADD +#ifdef CONFIG_GRKERNSEC_PROC_USER + proc_bus_pci_dir = proc_mkdir_mode("pci", S_IRUSR | S_IXUSR, proc_bus); +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + proc_bus_pci_dir = proc_mkdir_mode("pci", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, proc_bus); +#endif +#else proc_bus_pci_dir = proc_mkdir("pci", proc_bus); +#endif entry = create_proc_entry("devices", 0, proc_bus_pci_dir); if (entry) entry->proc_fops = &proc_bus_pci_dev_operations; diff -Naur linux-2.6.19.2.orig/drivers/pnp/pnpbios/bioscalls.c linux-2.6.19.2/drivers/pnp/pnpbios/bioscalls.c --- linux-2.6.19.2.orig/drivers/pnp/pnpbios/bioscalls.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/pnp/pnpbios/bioscalls.c 2007-01-12 23:27:23.000000000 +0000 @@ -65,7 +65,7 @@ set_limit(gdt[(selname) >> 3], size); \ } while(0) -static struct desc_struct bad_bios_desc = { 0, 0x00409200 }; +static struct desc_struct bad_bios_desc = { 0, 0x00409300 }; /* * At some point we want to use this stack frame pointer to unwind @@ -93,6 +93,10 @@ struct desc_struct save_desc_40; int cpu; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + /* * PnP BIOSes are generally not terribly re-entrant. * Also, don't rely on them to save everything correctly. @@ -107,6 +111,10 @@ /* On some boxes IRQ's during PnP BIOS calls are deadly. */ spin_lock_irqsave(&pnp_bios_lock, flags); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + /* The lock prevents us bouncing CPU here */ if (ts1_size) Q2_SET_SEL(smp_processor_id(), PNP_TS1, ts1_base, ts1_size); @@ -142,9 +150,14 @@ "i" (0) : "memory" ); - spin_unlock_irqrestore(&pnp_bios_lock, flags); get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + + spin_unlock_irqrestore(&pnp_bios_lock, flags); put_cpu(); /* If we get here and this is set then the PnP BIOS faulted on us. */ diff -Naur linux-2.6.19.2.orig/drivers/pnp/resource.c linux-2.6.19.2/drivers/pnp/resource.c --- linux-2.6.19.2.orig/drivers/pnp/resource.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/pnp/resource.c 2007-01-12 23:27:23.000000000 +0000 @@ -364,7 +364,7 @@ return 1; /* check if the resource is valid */ - if (*irq < 0 || *irq > 15) + if (*irq > 15) return 0; /* check if the resource is reserved */ @@ -430,7 +430,7 @@ return 1; /* check if the resource is valid */ - if (*dma < 0 || *dma == 4 || *dma > 7) + if (*dma == 4 || *dma > 7) return 0; /* check if the resource is reserved */ diff -Naur linux-2.6.19.2.orig/drivers/scsi/scsi_logging.h linux-2.6.19.2/drivers/scsi/scsi_logging.h --- linux-2.6.19.2.orig/drivers/scsi/scsi_logging.h 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/scsi/scsi_logging.h 2007-01-12 23:27:23.000000000 +0000 @@ -51,7 +51,7 @@ } while (0); \ } while (0) #else -#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) +#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) do {} while (0) #endif /* CONFIG_SCSI_LOGGING */ /* diff -Naur linux-2.6.19.2.orig/drivers/usb/storage/debug.h linux-2.6.19.2/drivers/usb/storage/debug.h --- linux-2.6.19.2.orig/drivers/usb/storage/debug.h 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/usb/storage/debug.h 2007-01-12 23:27:23.000000000 +0000 @@ -56,9 +56,9 @@ #define US_DEBUGPX(x...) printk( x ) #define US_DEBUG(x) x #else -#define US_DEBUGP(x...) -#define US_DEBUGPX(x...) -#define US_DEBUG(x) +#define US_DEBUGP(x...) do {} while (0) +#define US_DEBUGPX(x...) do {} while (0) +#define US_DEBUG(x) do {} while (0) #endif #endif diff -Naur linux-2.6.19.2.orig/drivers/video/fbcmap.c linux-2.6.19.2/drivers/video/fbcmap.c --- linux-2.6.19.2.orig/drivers/video/fbcmap.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/video/fbcmap.c 2007-01-12 23:27:23.000000000 +0000 @@ -250,8 +250,7 @@ int rc, size = cmap->len * sizeof(u16); struct fb_cmap umap; - if (cmap->start < 0 || (!info->fbops->fb_setcolreg && - !info->fbops->fb_setcmap)) + if (!info->fbops->fb_setcolreg && !info->fbops->fb_setcmap) return -EINVAL; memset(&umap, 0, sizeof(struct fb_cmap)); diff -Naur linux-2.6.19.2.orig/drivers/video/fbmem.c linux-2.6.19.2/drivers/video/fbmem.c --- linux-2.6.19.2.orig/drivers/video/fbmem.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/video/fbmem.c 2007-01-12 23:27:23.000000000 +0000 @@ -936,9 +936,9 @@ case FBIOPUT_CON2FBMAP: if (copy_from_user(&con2fb, argp, sizeof(con2fb))) return - EFAULT; - if (con2fb.console < 0 || con2fb.console > MAX_NR_CONSOLES) + if (con2fb.console > MAX_NR_CONSOLES) return -EINVAL; - if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) + if (con2fb.framebuffer >= FB_MAX) return -EINVAL; #ifdef CONFIG_KMOD if (!registered_fb[con2fb.framebuffer]) diff -Naur linux-2.6.19.2.orig/drivers/video/fbmon.c linux-2.6.19.2/drivers/video/fbmon.c --- linux-2.6.19.2.orig/drivers/video/fbmon.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/video/fbmon.c 2007-01-12 23:27:23.000000000 +0000 @@ -45,7 +45,7 @@ #ifdef DEBUG #define DPRINTK(fmt, args...) printk(fmt,## args) #else -#define DPRINTK(fmt, args...) +#define DPRINTK(fmt, args...) do {} while (0) #endif #define FBMON_FIX_HEADER 1 diff -Naur linux-2.6.19.2.orig/drivers/video/i810/i810_accel.c linux-2.6.19.2/drivers/video/i810/i810_accel.c --- linux-2.6.19.2.orig/drivers/video/i810/i810_accel.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/video/i810/i810_accel.c 2007-01-12 23:27:23.000000000 +0000 @@ -73,6 +73,7 @@ } } printk("ringbuffer lockup!!!\n"); + printk("head:%u tail:%u iring.size:%u space:%u\n", head, tail, par->iring.size, space); i810_report_error(mmio); par->dev_flags |= LOCKUP; info->pixmap.scan_align = 1; diff -Naur linux-2.6.19.2.orig/drivers/video/i810/i810_main.c linux-2.6.19.2/drivers/video/i810/i810_main.c --- linux-2.6.19.2.orig/drivers/video/i810/i810_main.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/video/i810/i810_main.c 2007-01-12 23:27:23.000000000 +0000 @@ -1506,7 +1506,7 @@ int size = ((cursor->image.width + 7) >> 3) * cursor->image.height; int i; - u8 *data = kmalloc(64 * 8, GFP_ATOMIC); + u8 *data = kmalloc(64 * 8, GFP_KERNEL); if (data == NULL) return -ENOMEM; diff -Naur linux-2.6.19.2.orig/drivers/video/vesafb.c linux-2.6.19.2/drivers/video/vesafb.c --- linux-2.6.19.2.orig/drivers/video/vesafb.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/drivers/video/vesafb.c 2007-01-12 23:27:23.000000000 +0000 @@ -267,7 +267,7 @@ size_remap = size_total; vesafb_fix.smem_len = size_remap; -#ifndef __i386__ +#if !defined(__i386__) || defined(CONFIG_PAX_KERNEXEC) screen_info.vesapm_seg = 0; #endif diff -Naur linux-2.6.19.2.orig/fs/Kconfig linux-2.6.19.2/fs/Kconfig --- linux-2.6.19.2.orig/fs/Kconfig 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/Kconfig 2007-01-12 23:27:23.000000000 +0000 @@ -929,7 +929,7 @@ config PROC_KCORE bool "/proc/kcore support" if !ARM - depends on PROC_FS && MMU + depends on PROC_FS && MMU && !GRKERNSEC_PROC_ADD config PROC_VMCORE bool "/proc/vmcore support (EXPERIMENTAL)" diff -Naur linux-2.6.19.2.orig/fs/binfmt_aout.c linux-2.6.19.2/fs/binfmt_aout.c --- linux-2.6.19.2.orig/fs/binfmt_aout.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/binfmt_aout.c 2007-01-12 23:27:23.000000000 +0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -123,10 +124,12 @@ /* If the size of the dump file exceeds the rlimit, then see what would happen if we wrote the stack, but not the data area. */ #ifdef __sparc__ + gr_learn_resource(current, RLIMIT_CORE, dump.u_dsize+dump.u_ssize, 1); if ((dump.u_dsize+dump.u_ssize) > current->signal->rlim[RLIMIT_CORE].rlim_cur) dump.u_dsize = 0; #else + gr_learn_resource(current, RLIMIT_CORE, (dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE, 1); if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE > current->signal->rlim[RLIMIT_CORE].rlim_cur) dump.u_dsize = 0; @@ -134,10 +137,12 @@ /* Make sure we have enough room to write the stack and data areas. */ #ifdef __sparc__ + gr_learn_resource(current, RLIMIT_CORE, dump.u_ssize, 1); if ((dump.u_ssize) > current->signal->rlim[RLIMIT_CORE].rlim_cur) dump.u_ssize = 0; #else + gr_learn_resource(current, RLIMIT_CORE, (dump.u_ssize+1) * PAGE_SIZE, 1); if ((dump.u_ssize+1) * PAGE_SIZE > current->signal->rlim[RLIMIT_CORE].rlim_cur) dump.u_ssize = 0; @@ -294,6 +299,8 @@ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; if (rlim >= RLIM_INFINITY) rlim = ~0; + + gr_learn_resource(current, RLIMIT_DATA, ex.a_data + ex.a_bss, 1); if (ex.a_data + ex.a_bss > rlim) return -ENOMEM; @@ -326,6 +333,28 @@ current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; + +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) + current->mm->pax_flags = 0UL; +#endif + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(N_FLAGS(ex) & F_PAX_PAGEEXEC)) { + current->mm->pax_flags |= MF_PAX_PAGEEXEC; + +#ifdef CONFIG_PAX_EMUTRAMP + if (N_FLAGS(ex) & F_PAX_EMUTRAMP) + current->mm->pax_flags |= MF_PAX_EMUTRAMP; +#endif + +#ifdef CONFIG_PAX_MPROTECT + if (!(N_FLAGS(ex) & F_PAX_MPROTECT)) + current->mm->pax_flags |= MF_PAX_MPROTECT; +#endif + + } +#endif + #ifdef __sparc__ if (N_MAGIC(ex) == NMAGIC) { loff_t pos = fd_offset; @@ -421,7 +450,7 @@ down_write(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, - PROT_READ | PROT_WRITE | PROT_EXEC, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); up_write(¤t->mm->mmap_sem); diff -Naur linux-2.6.19.2.orig/fs/binfmt_elf.c linux-2.6.19.2/fs/binfmt_elf.c --- linux-2.6.19.2.orig/fs/binfmt_elf.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/binfmt_elf.c 2007-01-12 23:27:23.000000000 +0000 @@ -39,10 +39,16 @@ #include #include #include +#include + #include #include #include +#ifdef CONFIG_PAX_SEGMEXEC +#include +#endif + static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); @@ -87,6 +93,8 @@ static int set_brk(unsigned long start, unsigned long end) { + unsigned long e = end; + start = ELF_PAGEALIGN(start); end = ELF_PAGEALIGN(end); if (end > start) { @@ -97,7 +105,7 @@ if (BAD_ADDR(addr)) return addr; } - current->mm->start_brk = current->mm->brk = end; + current->mm->start_brk = current->mm->brk = e; return 0; } @@ -315,10 +323,9 @@ { struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; - unsigned long load_addr = 0; - int load_addr_set = 0; + unsigned long load_addr = 0, min_addr, max_addr, task_size = TASK_SIZE; unsigned long last_bss = 0, elf_bss = 0; - unsigned long error = ~0UL; + unsigned long error = -EINVAL; int retval, i, size; /* First of all, some simple consistency checks */ @@ -357,66 +364,86 @@ goto out_close; } +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + eppnt = elf_phdata; + min_addr = task_size; + max_addr = 0; + error = -ENOMEM; + for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { - if (eppnt->p_type == PT_LOAD) { - int elf_type = MAP_PRIVATE | MAP_DENYWRITE; - int elf_prot = 0; - unsigned long vaddr = 0; - unsigned long k, map_addr; - - if (eppnt->p_flags & PF_R) - elf_prot = PROT_READ; - if (eppnt->p_flags & PF_W) - elf_prot |= PROT_WRITE; - if (eppnt->p_flags & PF_X) - elf_prot |= PROT_EXEC; - vaddr = eppnt->p_vaddr; - if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) - elf_type |= MAP_FIXED; - - map_addr = elf_map(interpreter, load_addr + vaddr, - eppnt, elf_prot, elf_type); - error = map_addr; - if (BAD_ADDR(map_addr)) - goto out_close; - - if (!load_addr_set && - interp_elf_ex->e_type == ET_DYN) { - load_addr = map_addr - ELF_PAGESTART(vaddr); - load_addr_set = 1; - } + if (eppnt->p_type != PT_LOAD) + continue; - /* - * Check to see if the section's size will overflow the - * allowed task size. Note that p_filesz must always be - * <= p_memsize so it's only necessary to check p_memsz. - */ - k = load_addr + eppnt->p_vaddr; - if (BAD_ADDR(k) || - eppnt->p_filesz > eppnt->p_memsz || - eppnt->p_memsz > TASK_SIZE || - TASK_SIZE - eppnt->p_memsz < k) { - error = -ENOMEM; - goto out_close; - } + /* + * Check to see if the section's size will overflow the + * allowed task size. Note that p_filesz must always be + * <= p_memsize so it is only necessary to check p_memsz. + */ + if (eppnt->p_filesz > eppnt->p_memsz || eppnt->p_vaddr >= eppnt->p_vaddr + eppnt->p_memsz) + goto out_close; - /* - * Find the end of the file mapping for this phdr, and - * keep track of the largest address we see for this. - */ - k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; - if (k > elf_bss) - elf_bss = k; + if (min_addr > ELF_PAGESTART(eppnt->p_vaddr)) + min_addr = ELF_PAGESTART(eppnt->p_vaddr); + if (max_addr < ELF_PAGEALIGN(eppnt->p_vaddr + eppnt->p_memsz)) + max_addr = ELF_PAGEALIGN(eppnt->p_vaddr + eppnt->p_memsz); + } + if (min_addr >= max_addr || max_addr > task_size) + goto out_close; - /* - * Do the same thing for the memory mapping - between - * elf_bss and last_bss is the bss section. - */ - k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; - if (k > last_bss) - last_bss = k; - } + if (interp_elf_ex->e_type == ET_DYN) { + load_addr = get_unmapped_area(interpreter, 0, max_addr - min_addr, 0, MAP_PRIVATE | MAP_EXECUTABLE); + + if (load_addr >= task_size) + goto out_close; + + load_addr -= min_addr; + } + + eppnt = elf_phdata; + for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { + int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_FIXED; + int elf_prot = 0; + unsigned long vaddr = 0; + unsigned long k, map_addr; + + if (eppnt->p_type != PT_LOAD) + continue; + + if (eppnt->p_flags & PF_R) + elf_prot = PROT_READ; + if (eppnt->p_flags & PF_W) + elf_prot |= PROT_WRITE; + if (eppnt->p_flags & PF_X) + elf_prot |= PROT_EXEC; + vaddr = eppnt->p_vaddr; + + map_addr = elf_map(interpreter, load_addr + vaddr, + eppnt, elf_prot, elf_type); + error = map_addr; + if (BAD_ADDR(map_addr)) + goto out_close; + + k = load_addr + eppnt->p_vaddr; + + /* + * Find the end of the file mapping for this phdr, and + * keep track of the largest address we see for this. + */ + k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; + if (k > elf_bss) + elf_bss = k; + + /* + * Do the same thing for the memory mapping - between + * elf_bss and last_bss is the bss section. + */ + k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; + if (k > last_bss) + last_bss = k; } /* @@ -444,6 +471,8 @@ *interp_load_addr = load_addr; error = ((unsigned long)interp_elf_ex->e_entry) + load_addr; + if (BAD_ADDR(error)) + error = -EFAULT; out_close: kfree(elf_phdata); @@ -454,7 +483,7 @@ static unsigned long load_aout_interp(struct exec *interp_ex, struct file *interpreter) { - unsigned long text_data, elf_entry = ~0UL; + unsigned long text_data, elf_entry = -EINVAL; char __user * addr; loff_t offset; @@ -497,6 +526,180 @@ return elf_entry; } +#if (defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS)) && defined(CONFIG_PAX_SOFTMODE) +static unsigned long pax_parse_softmode(const struct elf_phdr * const elf_phdata) +{ + unsigned long pax_flags = 0UL; + +#ifdef CONFIG_PAX_PAGEEXEC + if (elf_phdata->p_flags & PF_PAGEEXEC) + pax_flags |= MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (elf_phdata->p_flags & PF_SEGMEXEC) + pax_flags |= MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_PAGEEXEC + if (pax_flags & MF_PAX_PAGEEXEC) + pax_flags &= ~MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_SEGMEXEC + if (pax_flags & MF_PAX_SEGMEXEC) + pax_flags &= ~MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + if (elf_phdata->p_flags & PF_EMUTRAMP) + pax_flags |= MF_PAX_EMUTRAMP; +#endif + +#ifdef CONFIG_PAX_MPROTECT + if (elf_phdata->p_flags & PF_MPROTECT) + pax_flags |= MF_PAX_MPROTECT; +#endif + +#if defined(CONFIG_PAX_RANDMMAP) || defined(CONFIG_PAX_RANDUSTACK) + if (randomize_va_space && (elf_phdata->p_flags & PF_RANDMMAP)) + pax_flags |= MF_PAX_RANDMMAP; +#endif + + return pax_flags; +} +#endif + +#ifdef CONFIG_PAX_PT_PAX_FLAGS +static unsigned long pax_parse_hardmode(const struct elf_phdr * const elf_phdata) +{ + unsigned long pax_flags = 0UL; + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(elf_phdata->p_flags & PF_NOPAGEEXEC)) + pax_flags |= MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(elf_phdata->p_flags & PF_NOSEGMEXEC)) + pax_flags |= MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_PAGEEXEC + if (pax_flags & MF_PAX_PAGEEXEC) + pax_flags &= ~MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_SEGMEXEC + if (pax_flags & MF_PAX_SEGMEXEC) + pax_flags &= ~MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + if (!(elf_phdata->p_flags & PF_NOEMUTRAMP)) + pax_flags |= MF_PAX_EMUTRAMP; +#endif + +#ifdef CONFIG_PAX_MPROTECT + if (!(elf_phdata->p_flags & PF_NOMPROTECT)) + pax_flags |= MF_PAX_MPROTECT; +#endif + +#if defined(CONFIG_PAX_RANDMMAP) || defined(CONFIG_PAX_RANDUSTACK) + if (randomize_va_space && !(elf_phdata->p_flags & PF_NORANDMMAP)) + pax_flags |= MF_PAX_RANDMMAP; +#endif + + return pax_flags; +} +#endif + +#ifdef CONFIG_PAX_EI_PAX +static unsigned long pax_parse_ei_pax(const struct elfhdr * const elf_ex) +{ + unsigned long pax_flags = 0UL; + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(elf_ex->e_ident[EI_PAX] & EF_PAX_PAGEEXEC)) + pax_flags |= MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(elf_ex->e_ident[EI_PAX] & EF_PAX_SEGMEXEC)) + pax_flags |= MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_PAGEEXEC + if (pax_flags & MF_PAX_PAGEEXEC) + pax_flags &= ~MF_PAX_SEGMEXEC; +#endif + +#ifdef CONFIG_PAX_DEFAULT_SEGMEXEC + if (pax_flags & MF_PAX_SEGMEXEC) + pax_flags &= ~MF_PAX_PAGEEXEC; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) && (elf_ex->e_ident[EI_PAX] & EF_PAX_EMUTRAMP)) + pax_flags |= MF_PAX_EMUTRAMP; +#endif + +#ifdef CONFIG_PAX_MPROTECT + if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) && !(elf_ex->e_ident[EI_PAX] & EF_PAX_MPROTECT)) + pax_flags |= MF_PAX_MPROTECT; +#endif + +#ifdef CONFIG_PAX_ASLR + if (randomize_va_space && !(elf_ex->e_ident[EI_PAX] & EF_PAX_RANDMMAP)) + pax_flags |= MF_PAX_RANDMMAP; +#endif + + return pax_flags; +} +#endif + +#if defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS) +static long pax_parse_elf_flags(const struct elfhdr * const elf_ex, const struct elf_phdr * const elf_phdata) +{ + unsigned long pax_flags = 0UL; + +#ifdef CONFIG_PAX_PT_PAX_FLAGS + unsigned long i; +#endif + +#ifdef CONFIG_PAX_EI_PAX + pax_flags = pax_parse_ei_pax(elf_ex); +#endif + +#ifdef CONFIG_PAX_PT_PAX_FLAGS + for (i = 0UL; i < elf_ex->e_phnum; i++) + if (elf_phdata[i].p_type == PT_PAX_FLAGS) { + if (((elf_phdata[i].p_flags & PF_PAGEEXEC) && (elf_phdata[i].p_flags & PF_NOPAGEEXEC)) || + ((elf_phdata[i].p_flags & PF_SEGMEXEC) && (elf_phdata[i].p_flags & PF_NOSEGMEXEC)) || + ((elf_phdata[i].p_flags & PF_EMUTRAMP) && (elf_phdata[i].p_flags & PF_NOEMUTRAMP)) || + ((elf_phdata[i].p_flags & PF_MPROTECT) && (elf_phdata[i].p_flags & PF_NOMPROTECT)) || + ((elf_phdata[i].p_flags & PF_RANDMMAP) && (elf_phdata[i].p_flags & PF_NORANDMMAP))) + return -EINVAL; + +#ifdef CONFIG_PAX_SOFTMODE + if (pax_softmode) + pax_flags = pax_parse_softmode(&elf_phdata[i]); + else +#endif + + pax_flags = pax_parse_hardmode(&elf_phdata[i]); + break; + } +#endif + + if (0 > pax_check_flags(&pax_flags)) + return -EINVAL; + + current->mm->pax_flags = pax_flags; + return 0; +} +#endif + /* * These are the functions used to load ELF style executables and shared * libraries. There is no binary dependent code anywhere else. @@ -534,7 +737,7 @@ char * elf_interpreter = NULL; unsigned int interpreter_type = INTERPRETER_NONE; unsigned char ibcs2_interpreter = 0; - unsigned long error; + unsigned long error = 0; struct elf_phdr *elf_ppnt, *elf_phdata; unsigned long elf_bss, elf_brk; int elf_exec_fileno; @@ -552,6 +755,7 @@ struct elfhdr interp_elf_ex; struct exec interp_ex; } *loc; + unsigned long task_size = TASK_SIZE; loc = kmalloc(sizeof(*loc), GFP_KERNEL); if (!loc) { @@ -774,14 +978,88 @@ current->mm->end_code = 0; current->mm->mmap = NULL; current->flags &= ~PF_FORKNOEXEC; + +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) + current->mm->pax_flags = 0UL; +#endif + +#ifdef CONFIG_PAX_DLRESOLVE + current->mm->call_dl_resolve = 0UL; +#endif + +#if defined(CONFIG_PPC32) && defined(CONFIG_PAX_EMUSIGRT) + current->mm->call_syscall = 0UL; +#endif + +#ifdef CONFIG_PAX_ASLR + current->mm->delta_mmap = 0UL; + current->mm->delta_exec = 0UL; + current->mm->delta_stack = 0UL; +#endif + current->mm->def_flags = def_flags; +#if defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS) + if (0 > pax_parse_elf_flags(&loc->elf_ex, elf_phdata)) { + send_sig(SIGKILL, current, 0); + goto out_free_dentry; + } +#endif + +#ifdef CONFIG_PAX_HAVE_ACL_FLAGS + pax_set_initial_flags(bprm); +#elif defined(CONFIG_PAX_HOOK_ACL_FLAGS) + if (pax_set_initial_flags_func) + (pax_set_initial_flags_func)(bprm); +#endif + +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT + if (current->mm->pax_flags & MF_PAX_PAGEEXEC) + current->mm->context.user_cs_limit = PAGE_SIZE; +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { + int cpu = get_cpu(); + + current->mm->context.user_cs_base = SEGMEXEC_TASK_SIZE; + current->mm->context.user_cs_limit = -SEGMEXEC_TASK_SIZE; + set_user_cs(current->mm, cpu); + put_cpu(); + task_size = SEGMEXEC_TASK_SIZE; + } +#endif + +#ifdef CONFIG_PAX_ASLR + if (current->mm->pax_flags & MF_PAX_RANDMMAP) { +#define pax_delta_mask(delta, lsb, len) (((delta) & ((1UL << (len)) - 1)) << (lsb)) + + current->mm->delta_mmap = pax_delta_mask(pax_get_random_long(), PAX_DELTA_MMAP_LSB(current), PAX_DELTA_MMAP_LEN(current)); + current->mm->delta_exec = pax_delta_mask(pax_get_random_long(), PAX_DELTA_EXEC_LSB(current), PAX_DELTA_EXEC_LEN(current)); + current->mm->delta_stack = pax_delta_mask(pax_get_random_long(), PAX_DELTA_STACK_LSB(current), PAX_DELTA_STACK_LEN(current)); + } +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (current->mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) + executable_stack = EXSTACK_DEFAULT; +#endif + /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (!(current->mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC))) +#endif + if (elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; +#ifdef CONFIG_PAX_ASLR + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; arch_pick_mmap_layout(current->mm); @@ -857,6 +1135,15 @@ * might try to exec. This is because the brk will * follow the loader, and is not movable. */ load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); + +#ifdef CONFIG_PAX_RANDMMAP + /* PaX: randomize base address at the default exe base if requested */ + if (current->mm->pax_flags & MF_PAX_RANDMMAP) { + load_bias = ELF_PAGESTART(PAX_ELF_ET_DYN_BASE(current) - vaddr + current->mm->delta_exec); + elf_flags |= MAP_FIXED; + } +#endif + } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, @@ -887,9 +1174,9 @@ * allowed task size. Note that p_filesz must always be * <= p_memsz so it is only necessary to check p_memsz. */ - if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || - elf_ppnt->p_memsz > TASK_SIZE || - TASK_SIZE - elf_ppnt->p_memsz < k) { + if (k >= task_size || elf_ppnt->p_filesz > elf_ppnt->p_memsz || + elf_ppnt->p_memsz > task_size || + task_size - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ send_sig(SIGKILL, current, 0); goto out_free_dentry; @@ -916,6 +1203,12 @@ start_data += load_bias; end_data += load_bias; +#ifdef CONFIG_PAX_RANDMMAP + if (current->mm->pax_flags & MF_PAX_RANDMMAP) + elf_brk += PAGE_SIZE + pax_delta_mask(pax_get_random_long(), 4, PAGE_SHIFT); +#undef pax_delta_mask +#endif + /* Calling set_brk effectively mmaps the pages that we need * for the bss and break sections. We must do this before * mapping in the interpreter, to make sure it doesn't wind @@ -1178,7 +1471,7 @@ * * I think we should skip something. But I am not sure how. H.J. */ -static int maydump(struct vm_area_struct *vma) +static int maydump(struct vm_area_struct *vma, long signr) { /* Do not dump I/O mapped devices or special mappings */ if (vma->vm_flags & (VM_IO | VM_RESERVED)) @@ -1189,7 +1482,7 @@ return vma->vm_file->f_dentry->d_inode->i_nlink == 0; /* If it hasn't been written to, don't write it out */ - if (!vma->anon_vma) + if (signr != SIGKILL && !vma->anon_vma) return 0; return 1; @@ -1246,8 +1539,11 @@ #undef DUMP_WRITE #define DUMP_WRITE(addr, nr) \ + do { \ + gr_learn_resource(current, RLIMIT_CORE, size + (nr), 1); \ if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ - goto end_coredump; + goto end_coredump; \ + } while (0); #define DUMP_SEEK(off) \ if (!dump_seek(file, (off))) \ goto end_coredump; @@ -1600,7 +1896,7 @@ phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; - phdr.p_filesz = maydump(vma) ? sz : 0; + phdr.p_filesz = maydump(vma, signr) ? sz : 0; phdr.p_memsz = sz; offset += phdr.p_filesz; phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; @@ -1638,7 +1934,7 @@ for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { unsigned long addr; - if (!maydump(vma)) + if (!maydump(vma, signr)) continue; for (addr = vma->vm_start; @@ -1658,6 +1954,7 @@ flush_cache_page(vma, addr, page_to_pfn(page)); kaddr = kmap(page); + gr_learn_resource(current, RLIMIT_CORE, size + PAGE_SIZE, 1); if ((size += PAGE_SIZE) > limit || !dump_write(file, kaddr, PAGE_SIZE)) { diff -Naur linux-2.6.19.2.orig/fs/binfmt_flat.c linux-2.6.19.2/fs/binfmt_flat.c --- linux-2.6.19.2.orig/fs/binfmt_flat.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/binfmt_flat.c 2007-01-12 23:27:23.000000000 +0000 @@ -551,7 +551,9 @@ realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", (int)-datapos); + down_write(¤t->mm->mmap_sem); do_munmap(current->mm, textpos, text_len); + up_write(¤t->mm->mmap_sem); ret = realdatastart; goto err; } @@ -573,8 +575,10 @@ } if (result >= (unsigned long)-4096) { printk("Unable to read data+bss, errno %d\n", (int)-result); + down_write(¤t->mm->mmap_sem); do_munmap(current->mm, textpos, text_len); do_munmap(current->mm, realdatastart, data_len + extra); + up_write(¤t->mm->mmap_sem); ret = result; goto err; } @@ -638,8 +642,10 @@ } if (result >= (unsigned long)-4096) { printk("Unable to read code+data+bss, errno %d\n",(int)-result); + down_write(¤t->mm->mmap_sem); do_munmap(current->mm, textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); + up_write(¤t->mm->mmap_sem); ret = result; goto err; } diff -Naur linux-2.6.19.2.orig/fs/binfmt_misc.c linux-2.6.19.2/fs/binfmt_misc.c --- linux-2.6.19.2.orig/fs/binfmt_misc.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/binfmt_misc.c 2007-01-12 23:27:23.000000000 +0000 @@ -113,9 +113,11 @@ struct files_struct *files = NULL; retval = -ENOEXEC; - if (!enabled) + if (!enabled || bprm->misc) goto _ret; + bprm->misc++; + /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); diff -Naur linux-2.6.19.2.orig/fs/buffer.c linux-2.6.19.2/fs/buffer.c --- linux-2.6.19.2.orig/fs/buffer.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/buffer.c 2007-01-12 23:27:23.000000000 +0000 @@ -41,6 +41,7 @@ #include #include #include +#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void invalidate_bh_lrus(void); @@ -2018,6 +2019,7 @@ err = -EFBIG; limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + gr_learn_resource(current, RLIMIT_FSIZE, (unsigned long) size, 1); if (limit != RLIM_INFINITY && size > (loff_t)limit) { send_sig(SIGXFSZ, current, 0); goto out; diff -Naur linux-2.6.19.2.orig/fs/cifs/cifssmb.c linux-2.6.19.2/fs/cifs/cifssmb.c --- linux-2.6.19.2.orig/fs/cifs/cifssmb.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/cifs/cifssmb.c 2007-01-12 23:27:23.000000000 +0000 @@ -2812,10 +2812,10 @@ /* security id for everyone */ -const static struct cifs_sid sid_everyone = +static const struct cifs_sid sid_everyone = {1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}}; /* group users */ -const static struct cifs_sid sid_user = +static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}}; /* Convert CIFS ACL to POSIX form */ diff -Naur linux-2.6.19.2.orig/fs/cifs/smbdes.c linux-2.6.19.2/fs/cifs/smbdes.c --- linux-2.6.19.2.orig/fs/cifs/smbdes.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/cifs/smbdes.c 2007-01-12 23:27:23.000000000 +0000 @@ -196,15 +196,18 @@ char c[28]; char d[28]; char *cd; - char ki[16][48]; + char *ki; char *pd1; char l[32], r[32]; char *rl; + char *er; /* er[48] */ /* Have to reduce stack usage */ - pk1 = kmalloc(56+56+64+64,GFP_KERNEL); - if(pk1 == NULL) - return; + pk1 = kmalloc(56+56+64+64, GFP_KERNEL); + ki = kmalloc(16*48, GFP_KERNEL); + er = kmalloc(48+48+32+32+32, GFP_KERNEL); + if (!pk1 || !ki || !er) + goto free; cd = pk1 + 56; pd1= cd + 56; @@ -222,7 +225,7 @@ lshift(d, sc[i], 28); concat(cd, c, d, 28, 28); - permute(ki[i], cd, perm2, 48); + permute(ki+48*i, cd, perm2, 48); } permute(pd1, in, perm3, 64); @@ -233,18 +236,12 @@ } for (i = 0; i < 16; i++) { - char *er; /* er[48] */ char *erk; /* erk[48] */ char b[8][6]; char *cb; /* cb[32] */ char *pcb; /* pcb[32] */ char *r2; /* r2[32] */ - er = kmalloc(48+48+32+32+32, GFP_KERNEL); - if(er == NULL) { - kfree(pk1); - return; - } erk = er+48; cb = erk+48; pcb = cb+32; @@ -252,7 +249,7 @@ permute(er, r, perm4, 48); - xor(erk, er, ki[forw ? i : 15 - i], 48); + xor(erk, er, ki+48*(forw ? i : 15 - i), 48); for (j = 0; j < 8; j++) for (k = 0; k < 6; k++) @@ -282,13 +279,15 @@ for (j = 0; j < 32; j++) r[j] = r2[j]; - - kfree(er); } concat(rl, r, l, 32, 32); permute(out, rl, perm6, 64); + +free: + kfree(er); + kfree(ki); kfree(pk1); } diff -Naur linux-2.6.19.2.orig/fs/compat.c linux-2.6.19.2/fs/compat.c --- linux-2.6.19.2.orig/fs/compat.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/compat.c 2007-01-12 23:27:23.000000000 +0000 @@ -46,6 +46,7 @@ #include #include #include +#include #include /* siocdevprivate_ioctl */ @@ -1492,6 +1493,11 @@ struct file *file; int retval; int i; +#ifdef CONFIG_GRKERNSEC + struct file *old_exec_file; + struct acl_subject_label *old_acl; + struct rlimit old_rlim[RLIM_NLIMITS]; +#endif retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); @@ -1509,6 +1515,15 @@ bprm->file = file; bprm->filename = filename; bprm->interp = filename; + + gr_learn_resource(current, RLIMIT_NPROC, atomic_read(¤t->user->processes), 1); + retval = -EAGAIN; + if (gr_handle_nproc()) + goto out_file; + retval = -EACCES; + if (!gr_acl_handle_execve(file->f_dentry, file->f_vfsmnt)) + goto out_file; + bprm->mm = mm_alloc(); retval = -ENOMEM; if (!bprm->mm) @@ -1547,10 +1562,39 @@ if (retval < 0) goto out; + if (!gr_tpe_allow(file)) { + retval = -EACCES; + goto out; + } + + if (gr_check_crash_exec(file)) { + retval = -EACCES; + goto out; + } + + gr_log_chroot_exec(file->f_dentry, file->f_vfsmnt); + + gr_handle_exec_args(bprm, (char __user * __user *)argv); + +#ifdef CONFIG_GRKERNSEC + old_acl = current->acl; + memcpy(old_rlim, current->signal->rlim, sizeof(old_rlim)); + old_exec_file = current->exec_file; + get_file(file); + current->exec_file = file; +#endif + + gr_set_proc_label(file->f_dentry, file->f_vfsmnt); + retval = search_binary_handler(bprm, regs); if (retval >= 0) { free_arg_pages(bprm); +#ifdef CONFIG_GRKERNSEC + if (old_exec_file) + fput(old_exec_file); +#endif + /* execve success */ security_bprm_free(bprm); acct_update_integrals(current); @@ -1558,6 +1602,13 @@ return retval; } +#ifdef CONFIG_GRKERNSEC + current->acl = old_acl; + memcpy(current->signal->rlim, old_rlim, sizeof(old_rlim)); + fput(current->exec_file); + current->exec_file = old_exec_file; +#endif + out: /* Something went wrong, return the inode and free the argument pages*/ for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -Naur linux-2.6.19.2.orig/fs/dcache.c linux-2.6.19.2/fs/dcache.c --- linux-2.6.19.2.orig/fs/dcache.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/dcache.c 2007-01-12 23:27:23.000000000 +0000 @@ -1739,7 +1739,7 @@ * * "buflen" should be positive. Caller holds the dcache_lock. */ -static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, +char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, struct dentry *root, struct vfsmount *rootmnt, char *buffer, int buflen) { diff -Naur linux-2.6.19.2.orig/fs/exec.c linux-2.6.19.2/fs/exec.c --- linux-2.6.19.2.orig/fs/exec.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/exec.c 2007-01-12 23:27:23.000000000 +0000 @@ -49,6 +49,8 @@ #include #include #include +#include +#include #include #include @@ -67,6 +69,15 @@ static struct linux_binfmt *formats; static DEFINE_RWLOCK(binfmt_lock); +#ifdef CONFIG_PAX_SOFTMODE +unsigned int pax_softmode; +#endif + +#ifdef CONFIG_PAX_HOOK_ACL_FLAGS +void (*pax_set_initial_flags_func)(struct linux_binprm * bprm); +EXPORT_SYMBOL(pax_set_initial_flags_func); +#endif + int register_binfmt(struct linux_binfmt * fmt) { struct linux_binfmt ** tmp = &formats; @@ -312,6 +323,10 @@ if (unlikely(anon_vma_prepare(vma))) goto out; +#ifdef CONFIG_PAX_SEGMEXEC + if (page_count(page) == 1) +#endif + flush_dcache_page(page); pte = get_locked_pte(mm, address, &ptl); if (!pte) @@ -321,9 +336,21 @@ goto out; } inc_mm_counter(mm, anon_rss); + +#ifdef CONFIG_PAX_SEGMEXEC + if (page_count(page) == 1) +#endif + lru_cache_add_active(page); set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( page, vma->vm_page_prot)))); + +#ifdef CONFIG_PAX_SEGMEXEC + if (page_count(page) != 1) + page_add_anon_rmap(page, vma, address); + else +#endif + page_add_new_anon_rmap(page, vma, address); pte_unmap_unlock(pte, ptl); @@ -346,6 +373,10 @@ int i, ret; long arg_size; +#ifdef CONFIG_PAX_SEGMEXEC + struct vm_area_struct *mpnt_m = NULL; +#endif + #ifdef CONFIG_STACK_GROWSUP /* Move the argument and environment strings to the bottom of the * stack space. @@ -404,11 +435,19 @@ bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); if (!mpnt) return -ENOMEM; - memset(mpnt, 0, sizeof(*mpnt)); +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (VM_STACK_FLAGS & VM_MAYEXEC)) { + mpnt_m = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + if (!mpnt_m) { + kmem_cache_free(vm_area_cachep, mpnt); + return -ENOMEM; + } + } +#endif down_write(&mm->mmap_sem); { @@ -430,13 +469,50 @@ else mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_flags |= mm->def_flags; - mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7]; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if (!(mm->pax_flags & MF_PAX_PAGEEXEC)) + mpnt->vm_page_prot = protection_map[(mpnt->vm_flags | VM_EXEC) & (VM_READ|VM_WRITE|VM_EXEC)]; + else +#endif + + mpnt->vm_page_prot = protection_map[mpnt->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; if ((ret = insert_vm_struct(mm, mpnt))) { up_write(&mm->mmap_sem); kmem_cache_free(vm_area_cachep, mpnt); + +#ifdef CONFIG_PAX_SEGMEXEC + if (mpnt_m) + kmem_cache_free(vm_area_cachep, mpnt_m); +#endif + return ret; } mm->stack_vm = mm->total_vm = vma_pages(mpnt); + +#ifdef CONFIG_PAX_SEGMEXEC + if (mpnt_m) { + *mpnt_m = *mpnt; + if (!(mpnt->vm_flags & VM_EXEC)) { + mpnt_m->vm_flags &= ~(VM_READ | VM_WRITE | VM_EXEC); + mpnt_m->vm_page_prot = PAGE_NONE; + } + mpnt_m->vm_start += SEGMEXEC_TASK_SIZE; + mpnt_m->vm_end += SEGMEXEC_TASK_SIZE; + if ((ret = insert_vm_struct(mm, mpnt_m))) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, mpnt_m); + return ret; + } + mpnt_m->vm_flags |= VM_MIRROR; + mpnt->vm_flags |= VM_MIRROR; + mpnt_m->vm_mirror = mpnt->vm_start - mpnt_m->vm_start; + mpnt->vm_mirror = mpnt_m->vm_start - mpnt->vm_start; + mpnt_m->vm_pgoff = mpnt->vm_pgoff; + mm->total_vm += vma_pages(mpnt_m); + } +#endif + } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { @@ -444,6 +520,14 @@ if (page) { bprm->page[i] = NULL; install_arg_page(mpnt, page, stack_base); + +#ifdef CONFIG_PAX_SEGMEXEC + if (mpnt_m) { + page_cache_get(page); + install_arg_page(mpnt_m, page, stack_base + SEGMEXEC_TASK_SIZE); + } +#endif + } stack_base += PAGE_SIZE; } @@ -1128,6 +1212,11 @@ struct file *file; int retval; int i; +#ifdef CONFIG_GRKERNSEC + struct file *old_exec_file; + struct acl_subject_label *old_acl; + struct rlimit old_rlim[RLIM_NLIMITS]; +#endif retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); @@ -1139,10 +1228,29 @@ if (IS_ERR(file)) goto out_kfree; + gr_learn_resource(current, RLIMIT_NPROC, atomic_read(¤t->user->processes), 1); + + if (gr_handle_nproc()) { + allow_write_access(file); + fput(file); + return -EAGAIN; + } + + if (!gr_acl_handle_execve(file->f_dentry, file->f_vfsmnt)) { + allow_write_access(file); + fput(file); + return -EACCES; + } + sched_exec(); bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); +#ifdef CONFIG_PAX_RANDUSTACK + if (randomize_va_space) + bprm->p -= (pax_get_random_long() & ~(sizeof(void *)-1)) & ~PAGE_MASK; +#endif + bprm->file = file; bprm->filename = filename; bprm->interp = filename; @@ -1184,8 +1292,38 @@ if (retval < 0) goto out; + if (!gr_tpe_allow(file)) { + retval = -EACCES; + goto out; + } + + if (gr_check_crash_exec(file)) { + retval = -EACCES; + goto out; + } + + gr_log_chroot_exec(file->f_dentry, file->f_vfsmnt); + + gr_handle_exec_args(bprm, argv); + +#ifdef CONFIG_GRKERNSEC + old_acl = current->acl; + memcpy(old_rlim, current->signal->rlim, sizeof(old_rlim)); + old_exec_file = current->exec_file; + get_file(file); + current->exec_file = file; +#endif + + retval = gr_set_proc_label(file->f_dentry, file->f_vfsmnt); + if (retval < 0) + goto out_fail; + retval = search_binary_handler(bprm,regs); if (retval >= 0) { +#ifdef CONFIG_GRKERNSEC + if (old_exec_file) + fput(old_exec_file); +#endif free_arg_pages(bprm); /* execve success */ @@ -1195,6 +1333,14 @@ return retval; } +out_fail: +#ifdef CONFIG_GRKERNSEC + current->acl = old_acl; + memcpy(current->signal->rlim, old_rlim, sizeof(old_rlim)); + fput(current->exec_file); + current->exec_file = old_exec_file; +#endif + out: /* Something went wrong, return the inode and free the argument pages*/ for (i = 0 ; i < MAX_ARG_PAGES ; i++) { @@ -1355,6 +1501,114 @@ *out_ptr = 0; } +int pax_check_flags(unsigned long * flags) +{ + int retval = 0; + +#if !defined(__i386__) || !defined(CONFIG_PAX_SEGMEXEC) + if (*flags & MF_PAX_SEGMEXEC) + { + *flags &= ~MF_PAX_SEGMEXEC; + retval = -EINVAL; + } +#endif + + if ((*flags & MF_PAX_PAGEEXEC) + +#ifdef CONFIG_PAX_PAGEEXEC + && (*flags & MF_PAX_SEGMEXEC) +#endif + + ) + { + *flags &= ~MF_PAX_PAGEEXEC; + retval = -EINVAL; + } + + if ((*flags & MF_PAX_MPROTECT) + +#ifdef CONFIG_PAX_MPROTECT + && !(*flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) +#endif + + ) + { + *flags &= ~MF_PAX_MPROTECT; + retval = -EINVAL; + } + + if ((*flags & MF_PAX_EMUTRAMP) + +#ifdef CONFIG_PAX_EMUTRAMP + && !(*flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) +#endif + + ) + { + *flags &= ~MF_PAX_EMUTRAMP; + retval = -EINVAL; + } + + return retval; +} + +EXPORT_SYMBOL(pax_check_flags); + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +void pax_report_fault(struct pt_regs *regs, void *pc, void *sp) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = current->mm; + char* buffer_exec = (char*)__get_free_page(GFP_ATOMIC); + char* buffer_fault = (char*)__get_free_page(GFP_ATOMIC); + char* path_exec=NULL; + char* path_fault=NULL; + unsigned long start=0UL, end=0UL, offset=0UL; + + if (buffer_exec && buffer_fault) { + struct vm_area_struct* vma, * vma_exec=NULL, * vma_fault=NULL; + + down_read(&mm->mmap_sem); + vma = mm->mmap; + while (vma && (!vma_exec || !vma_fault)) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) + vma_exec = vma; + if (vma->vm_start <= (unsigned long)pc && (unsigned long)pc < vma->vm_end) + vma_fault = vma; + vma = vma->vm_next; + } + if (vma_exec) { + path_exec = d_path(vma_exec->vm_file->f_dentry, vma_exec->vm_file->f_vfsmnt, buffer_exec, PAGE_SIZE); + if (IS_ERR(path_exec)) + path_exec = ""; + } + if (vma_fault) { + start = vma_fault->vm_start; + end = vma_fault->vm_end; + offset = vma_fault->vm_pgoff << PAGE_SHIFT; + if (vma_fault->vm_file) { + path_fault = d_path(vma_fault->vm_file->f_dentry, vma_fault->vm_file->f_vfsmnt, buffer_fault, PAGE_SIZE); + if (IS_ERR(path_fault)) + path_fault = ""; + } else + path_fault = ""; + } + up_read(&mm->mmap_sem); + } + if (tsk->signal->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: execution attempt in: %s, %08lx-%08lx %08lx\n", NIPQUAD(tsk->signal->curr_ip), path_fault, start, end, offset); + else + printk(KERN_ERR "PAX: execution attempt in: %s, %08lx-%08lx %08lx\n", path_fault, start, end, offset); + printk(KERN_ERR "PAX: terminating task: %s(%s):%d, uid/euid: %u/%u, " + "PC: %p, SP: %p\n", path_exec, tsk->comm, tsk->pid, + tsk->uid, tsk->euid, pc, sp); + free_page((unsigned long)buffer_exec); + free_page((unsigned long)buffer_fault); + pax_report_insns(pc, sp); + do_coredump(SIGKILL, SIGKILL, regs); +} +#endif + static void zap_process(struct task_struct *start) { struct task_struct *t; @@ -1495,6 +1749,10 @@ */ clear_thread_flag(TIF_SIGPENDING); + if (signr == SIGKILL || signr == SIGILL) + gr_handle_brute_attach(current); + + gr_learn_resource(current, RLIMIT_CORE, binfmt->min_coredump, 1); if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) goto fail_unlock; diff -Naur linux-2.6.19.2.orig/fs/ext3/xattr.c linux-2.6.19.2/fs/ext3/xattr.c --- linux-2.6.19.2.orig/fs/ext3/xattr.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/ext3/xattr.c 2007-01-12 23:27:23.000000000 +0000 @@ -89,8 +89,8 @@ printk("\n"); \ } while (0) #else -# define ea_idebug(f...) -# define ea_bdebug(f...) +# define ea_idebug(f...) do {} while (0) +# define ea_bdebug(f...) do {} while (0) #endif static void ext3_xattr_cache_insert(struct buffer_head *); diff -Naur linux-2.6.19.2.orig/fs/ext4/extents.c linux-2.6.19.2/fs/ext4/extents.c --- linux-2.6.19.2.orig/fs/ext4/extents.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/ext4/extents.c 2007-01-12 23:27:23.000000000 +0000 @@ -1093,7 +1093,7 @@ return err; } -static int inline +inline static int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { @@ -1509,7 +1509,7 @@ * the caller should calculate credits under truncate_mutex and * pass the actual path. */ -int inline ext4_ext_calc_credits_for_insert(struct inode *inode, +inline int ext4_ext_calc_credits_for_insert(struct inode *inode, struct ext4_ext_path *path) { int depth, needed; @@ -1718,7 +1718,7 @@ * ext4_ext_more_to_rm: * returns 1 if current index has to be freed (even partial) */ -static int inline +inline static int ext4_ext_more_to_rm(struct ext4_ext_path *path) { BUG_ON(path->p_idx == NULL); diff -Naur linux-2.6.19.2.orig/fs/fcntl.c linux-2.6.19.2/fs/fcntl.c --- linux-2.6.19.2.orig/fs/fcntl.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/fcntl.c 2007-01-12 23:27:23.000000000 +0000 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,7 @@ struct fdtable *fdt; error = -EINVAL; + gr_learn_resource(current, RLIMIT_NOFILE, orig_start, 0); if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; @@ -83,6 +85,7 @@ } error = -EMFILE; + gr_learn_resource(current, RLIMIT_NOFILE, newfd, 0); if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; @@ -141,6 +144,8 @@ struct files_struct * files = current->files; struct fdtable *fdt; + gr_learn_resource(current, RLIMIT_NOFILE, newfd, 0); + spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; @@ -459,7 +464,8 @@ return (((fown->euid == 0) || (fown->euid == p->suid) || (fown->euid == p->uid) || (fown->uid == p->suid) || (fown->uid == p->uid)) && - !security_file_send_sigiotask(p, fown, sig)); + !security_file_send_sigiotask(p, fown, sig) && + !gr_check_protected_task(p) && !gr_pid_is_chrooted(p)); } static void send_sigio_to_task(struct task_struct *p, diff -Naur linux-2.6.19.2.orig/fs/namei.c linux-2.6.19.2/fs/namei.c --- linux-2.6.19.2.orig/fs/namei.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/namei.c 2007-01-12 23:27:23.000000000 +0000 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -640,6 +641,13 @@ err = security_inode_follow_link(path->dentry, nd); if (err) goto loop; + + if (gr_handle_follow_link(path->dentry->d_parent->d_inode, + path->dentry->d_inode, path->dentry, nd->mnt)) { + err = -EACCES; + goto loop; + } + current->link_count++; current->total_link_count++; nd->depth++; @@ -985,11 +993,18 @@ break; } return_base: + if (!gr_acl_handle_hidden_file(nd->dentry, nd->mnt)) { + path_release(nd); + return -ENOENT; + } return 0; out_dput: dput_path(&next, nd); break; } + if (!gr_acl_handle_hidden_file(nd->dentry, nd->mnt)) + err = -ENOENT; + path_release(nd); return_err: return err; @@ -1601,9 +1616,17 @@ int error; struct dentry *dir = nd->dentry; + if (!gr_acl_handle_creat(path->dentry, nd->dentry, nd->mnt, flag, mode)) { + error = -EACCES; + goto out_unlock_dput; + } + if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; error = vfs_create(dir->d_inode, path->dentry, mode, nd); + if (!error) + gr_handle_create(path->dentry, nd->mnt); +out_unlock_dput: mutex_unlock(&dir->d_inode->i_mutex); dput(nd->dentry); nd->dentry = path->dentry; @@ -1654,6 +1677,17 @@ nd, flag); if (error) return error; + + if (gr_handle_rawio(nd->dentry->d_inode)) { + error = -EPERM; + goto exit; + } + + if (!gr_acl_handle_open(nd->dentry, nd->mnt, flag)) { + error = -EACCES; + goto exit; + } + goto ok; } @@ -1703,6 +1737,23 @@ /* * It already exists. */ + + if (gr_handle_rawio(path.dentry->d_inode)) { + mutex_unlock(&dir->d_inode->i_mutex); + error = -EPERM; + goto exit_dput; + } + if (!gr_acl_handle_open(path.dentry, nd->mnt, flag)) { + mutex_unlock(&dir->d_inode->i_mutex); + error = -EACCES; + goto exit_dput; + } + if (gr_handle_fifo(path.dentry, nd->mnt, dir, flag, acc_mode)) { + mutex_unlock(&dir->d_inode->i_mutex); + error = -EACCES; + goto exit_dput; + } + mutex_unlock(&dir->d_inode->i_mutex); audit_inode_update(path.dentry->d_inode); @@ -1758,6 +1809,13 @@ error = security_inode_follow_link(path.dentry, nd); if (error) goto exit_dput; + + if (gr_handle_follow_link(path.dentry->d_parent->d_inode, path.dentry->d_inode, + path.dentry, nd->mnt)) { + error = -EACCES; + goto exit_dput; + } + error = __do_follow_link(&path, nd); if (error) { /* Does someone understand code flow here? Or it is only @@ -1886,6 +1944,22 @@ if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; if (!IS_ERR(dentry)) { + if (gr_handle_chroot_mknod(dentry, nd.mnt, mode)) { + error = -EPERM; + dput(dentry); + mutex_unlock(&nd.dentry->d_inode->i_mutex); + path_release(&nd); + goto out; + } + + if (!gr_acl_handle_mknod(dentry, nd.dentry, nd.mnt, mode)) { + error = -EACCES; + dput(dentry); + mutex_unlock(&nd.dentry->d_inode->i_mutex); + path_release(&nd); + goto out; + } + switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); @@ -1903,6 +1977,10 @@ default: error = -EINVAL; } + + if (!error) + gr_handle_create(dentry, nd.mnt); + dput(dentry); } mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -1960,9 +2038,18 @@ if (IS_ERR(dentry)) goto out_unlock; + if (!gr_acl_handle_mkdir(dentry, nd.dentry, nd.mnt)) { + error = -EACCES; + goto out_unlock_dput; + } + if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); + + if (!error) + gr_handle_create(dentry, nd.mnt); +out_unlock_dput: dput(dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2045,6 +2132,8 @@ char * name; struct dentry *dentry; struct nameidata nd; + ino_t saved_ino = 0; + dev_t saved_dev = 0; name = getname(pathname); if(IS_ERR(name)) @@ -2070,7 +2159,22 @@ error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit2; + + if (dentry->d_inode != NULL) { + if (dentry->d_inode->i_nlink <= 1) { + saved_ino = dentry->d_inode->i_ino; + saved_dev = dentry->d_inode->i_sb->s_dev; + } + + if (!gr_acl_handle_rmdir(dentry, nd.mnt)) { + error = -EACCES; + goto dput_exit2; + } + } error = vfs_rmdir(nd.dentry->d_inode, dentry); + if (!error && (saved_dev || saved_ino)) + gr_handle_delete(saved_ino, saved_dev); +dput_exit2: dput(dentry); exit2: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2129,6 +2233,8 @@ struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; + ino_t saved_ino = 0; + dev_t saved_dev = 0; name = getname(pathname); if(IS_ERR(name)) @@ -2144,13 +2250,26 @@ dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { + error = 0; /* Why not before? Because we want correct error value */ if (nd.last.name[nd.last.len]) goto slashes; inode = dentry->d_inode; - if (inode) + if (inode) { + if (inode->i_nlink <= 1) { + saved_ino = inode->i_ino; + saved_dev = inode->i_sb->s_dev; + } + + if (!gr_acl_handle_unlink(dentry, nd.mnt)) + error = -EACCES; + atomic_inc(&inode->i_count); - error = vfs_unlink(nd.dentry->d_inode, dentry); + } + if (!error) + error = vfs_unlink(nd.dentry->d_inode, dentry); + if (!error && (saved_ino || saved_dev)) + gr_handle_delete(saved_ino, saved_dev); exit2: dput(dentry); } @@ -2231,7 +2350,16 @@ if (IS_ERR(dentry)) goto out_unlock; + if (!gr_acl_handle_symlink(dentry, nd.dentry, nd.mnt, from)) { + error = -EACCES; + goto out_dput_unlock; + } + error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); + + if (!error) + gr_handle_create(dentry, nd.mnt); +out_dput_unlock: dput(dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2326,7 +2454,25 @@ error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out_unlock; + + if (gr_handle_hardlink(old_nd.dentry, old_nd.mnt, + old_nd.dentry->d_inode, + old_nd.dentry->d_inode->i_mode, to)) { + error = -EACCES; + goto out_unlock_dput; + } + + if (!gr_acl_handle_link(new_dentry, nd.dentry, nd.mnt, + old_nd.dentry, old_nd.mnt, to)) { + error = -EACCES; + goto out_unlock_dput; + } + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); + + if (!error) + gr_handle_create(new_dentry, nd.mnt); +out_unlock_dput: dput(new_dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2552,8 +2698,16 @@ if (new_dentry == trap) goto exit5; - error = vfs_rename(old_dir->d_inode, old_dentry, + error = gr_acl_handle_rename(new_dentry, newnd.dentry, newnd.mnt, + old_dentry, old_dir->d_inode, oldnd.mnt, + newname); + + if (!error) + error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); + if (!error) + gr_handle_rename(old_dir->d_inode, newnd.dentry->d_inode, old_dentry, + new_dentry, oldnd.mnt, new_dentry->d_inode ? 1 : 0); exit5: dput(new_dentry); exit4: diff -Naur linux-2.6.19.2.orig/fs/namespace.c linux-2.6.19.2/fs/namespace.c --- linux-2.6.19.2.orig/fs/namespace.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/namespace.c 2007-01-12 23:27:23.000000000 +0000 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include "pnode.h" @@ -598,6 +599,8 @@ DQUOT_OFF(sb); retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); unlock_kernel(); + + gr_log_remount(mnt->mnt_devname, retval); } up_write(&sb->s_umount); return retval; @@ -618,6 +621,9 @@ security_sb_umount_busy(mnt); up_write(&namespace_sem); release_mounts(&umount_list); + + gr_log_unmount(mnt->mnt_devname, retval); + return retval; } @@ -1418,6 +1424,11 @@ if (retval) goto dput_out; + if (gr_handle_chroot_mount(nd.dentry, nd.mnt, dev_name)) { + retval = -EPERM; + goto dput_out; + } + if (flags & MS_REMOUNT) retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, data_page); @@ -1432,6 +1443,9 @@ dev_name, data_page); dput_out: path_release(&nd); + + gr_log_mount(dev_name, dir_name, retval); + return retval; } @@ -1684,6 +1698,9 @@ if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (gr_handle_chroot_pivot()) + return -EPERM; + lock_kernel(); error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, diff -Naur linux-2.6.19.2.orig/fs/open.c linux-2.6.19.2/fs/open.c --- linux-2.6.19.2.orig/fs/open.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/open.c 2007-01-12 23:27:23.000000000 +0000 @@ -27,6 +27,7 @@ #include #include #include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { @@ -204,6 +205,9 @@ if (length < 0) return -EINVAL; + if (filp && !gr_acl_handle_truncate(dentry, filp->f_vfsmnt)) + return -EACCES; + newattrs.ia_size = length; newattrs.ia_valid = ATTR_SIZE | time_attrs; if (filp) { @@ -398,6 +402,9 @@ if(IS_RDONLY(nd.dentry->d_inode)) res = -EROFS; + if (!res && !gr_acl_handle_access(nd.dentry, nd.mnt, mode)) + res = -EACCES; + out_path_release: path_release(&nd); out: @@ -427,6 +434,8 @@ if (error) goto dput_and_out; + gr_log_chdir(nd.dentry, nd.mnt); + set_fs_pwd(current->fs, nd.mnt, nd.dentry); dput_and_out: @@ -457,6 +466,13 @@ goto out_putf; error = file_permission(file, MAY_EXEC); + + if (!error && !gr_chroot_fchdir(dentry, mnt)) + error = -EPERM; + + if (!error) + gr_log_chdir(dentry, mnt); + if (!error) set_fs_pwd(current->fs, mnt, dentry); out_putf: @@ -482,8 +498,16 @@ if (!capable(CAP_SYS_CHROOT)) goto dput_and_out; + if (gr_handle_chroot_chroot(nd.dentry, nd.mnt)) + goto dput_and_out; + set_fs_root(current->fs, nd.mnt, nd.dentry); set_fs_altroot(); + + gr_handle_chroot_caps(current); + + gr_handle_chroot_chdir(nd.dentry, nd.mnt); + error = 0; dput_and_out: path_release(&nd); @@ -514,9 +538,22 @@ err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out_putf; + + if (!gr_acl_handle_fchmod(dentry, file->f_vfsmnt, mode)) { + err = -EACCES; + goto out_putf; + } + mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; + + if (gr_handle_chroot_chmod(dentry, file->f_vfsmnt, mode)) { + err = -EPERM; + mutex_unlock(&inode->i_mutex); + goto out_putf; + } + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); @@ -549,9 +586,21 @@ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto dput_and_out; + if (!gr_acl_handle_chmod(nd.dentry, nd.mnt, mode)) { + error = -EACCES; + goto dput_and_out; + }; + mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; + + if (gr_handle_chroot_chmod(nd.dentry, nd.mnt, mode)) { + error = -EACCES; + mutex_unlock(&inode->i_mutex); + goto dput_and_out; + } + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(nd.dentry, &newattrs); @@ -568,7 +617,7 @@ return sys_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +static int chown_common(struct dentry * dentry, uid_t user, gid_t group, struct vfsmount *mnt) { struct inode * inode; int error; @@ -585,6 +634,12 @@ error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out; + + if (!gr_acl_handle_chown(dentry, mnt)) { + error = -EACCES; + goto out; + } + newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; @@ -611,7 +666,7 @@ error = user_path_walk(filename, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); + error = chown_common(nd.dentry, user, group, nd.mnt); path_release(&nd); out: return error; @@ -631,7 +686,7 @@ error = __user_walk_fd(dfd, filename, follow, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); + error = chown_common(nd.dentry, user, group, nd.mnt); path_release(&nd); out: return error; @@ -645,7 +700,7 @@ error = user_path_walk_link(filename, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); + error = chown_common(nd.dentry, user, group, nd.mnt); path_release(&nd); out: return error; @@ -664,7 +719,7 @@ dentry = file->f_dentry; audit_inode(NULL, dentry->d_inode); - error = chown_common(dentry, user, group); + error = chown_common(dentry, user, group, file->f_vfsmnt); fput(file); out: return error; @@ -872,6 +927,7 @@ * N.B. For clone tasks sharing a files structure, this test * will limit the total number of files that can be opened. */ + gr_learn_resource(current, RLIMIT_NOFILE, fd, 0); if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; diff -Naur linux-2.6.19.2.orig/fs/partitions/efi.c linux-2.6.19.2/fs/partitions/efi.c --- linux-2.6.19.2.orig/fs/partitions/efi.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/partitions/efi.c 2007-01-12 23:27:23.000000000 +0000 @@ -99,7 +99,7 @@ #ifdef EFI_DEBUG #define Dprintk(x...) printk(KERN_DEBUG x) #else -#define Dprintk(x...) +#define Dprintk(x...) do {} while (0) #endif /* This allows a kernel command line option 'gpt' to override diff -Naur linux-2.6.19.2.orig/fs/pipe.c linux-2.6.19.2/fs/pipe.c --- linux-2.6.19.2.orig/fs/pipe.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/pipe.c 2007-01-12 23:27:23.000000000 +0000 @@ -827,7 +827,7 @@ inode->i_pipe = NULL; } -static struct vfsmount *pipe_mnt __read_mostly; +struct vfsmount *pipe_mnt __read_mostly; static int pipefs_delete_dentry(struct dentry *dentry) { return 1; diff -Naur linux-2.6.19.2.orig/fs/proc/array.c linux-2.6.19.2/fs/proc/array.c --- linux-2.6.19.2.orig/fs/proc/array.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/proc/array.c 2007-01-12 23:27:23.000000000 +0000 @@ -291,6 +291,21 @@ cap_t(p->cap_effective)); } +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) +static inline char *task_pax(struct task_struct *p, char *buffer) +{ + if (p->mm) + return buffer + sprintf(buffer, "PaX:\t%c%c%c%c%c\n", + p->mm->pax_flags & MF_PAX_PAGEEXEC ? 'P' : 'p', + p->mm->pax_flags & MF_PAX_EMUTRAMP ? 'E' : 'e', + p->mm->pax_flags & MF_PAX_MPROTECT ? 'M' : 'm', + p->mm->pax_flags & MF_PAX_RANDMMAP ? 'R' : 'r', + p->mm->pax_flags & MF_PAX_SEGMEXEC ? 'S' : 's'); + else + return buffer + sprintf(buffer, "PaX:\t-----\n"); +} +#endif + int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; @@ -309,9 +324,20 @@ #if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif + +#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) + buffer = task_pax(task, buffer); +#endif + return buffer - orig; } +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP +#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \ + (_mm->pax_flags & MF_PAX_RANDMMAP || \ + _mm->pax_flags & MF_PAX_SEGMEXEC)) +#endif + static int do_task_stat(struct task_struct *task, char * buffer, int whole) { unsigned long vsize, eip, esp, wchan = ~0UL; @@ -406,6 +432,19 @@ stime = task->stime; } +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + if (PAX_RAND_FLAGS(mm)) { + eip = 0; + esp = 0; + wchan = 0; + } +#endif +#ifdef CONFIG_GRKERNSEC_HIDESYM + wchan = 0; + eip =0; + esp =0; +#endif + /* scale priority and nice values from timeslices to -20..20 */ /* to make it look like a "normal" Unix priority/nice value */ priority = task_prio(task); @@ -445,9 +484,15 @@ vsize, mm ? get_mm_rss(mm) : 0, rsslim, +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + PAX_RAND_FLAGS(mm) ? 1 : (mm ? mm->start_code : 0), + PAX_RAND_FLAGS(mm) ? 1 : (mm ? mm->end_code : 0), + PAX_RAND_FLAGS(mm) ? 0 : (mm ? mm->start_stack : 0), +#else mm ? mm->start_code : 0, mm ? mm->end_code : 0, mm ? mm->start_stack : 0, +#endif esp, eip, /* The signal information here is obsolete. @@ -494,3 +539,14 @@ return sprintf(buffer,"%d %d %d %d %d %d %d\n", size, resident, shared, text, lib, data, 0); } + +#ifdef CONFIG_GRKERNSEC_PROC_IPADDR +int proc_pid_ipaddr(struct task_struct *task, char * buffer) +{ + int len; + + len = sprintf(buffer, "%u.%u.%u.%u\n", NIPQUAD(task->signal->curr_ip)); + return len; +} +#endif + diff -Naur linux-2.6.19.2.orig/fs/proc/base.c linux-2.6.19.2/fs/proc/base.c --- linux-2.6.19.2.orig/fs/proc/base.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/proc/base.c 2007-01-12 23:27:23.000000000 +0000 @@ -73,6 +73,7 @@ #include #include #include +#include #include "internal.h" /* NOTE: @@ -194,7 +195,7 @@ (task->parent == current && \ (task->ptrace & PT_PTRACED) && \ (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ - security_ptrace(current,task) == 0)) + security_ptrace(current,task) == 0 && !gr_handle_proc_ptrace(task))) static int proc_pid_environ(struct task_struct *task, char * buffer) { @@ -330,6 +331,8 @@ task = get_proc_task(inode); if (task) { allowed = ptrace_may_attach(task); + if (allowed != 0) + allowed = !gr_acl_handle_procpidmem(task); put_task_struct(task); } return allowed; @@ -521,7 +524,7 @@ if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task) || gr_acl_handle_procpidmem(task)) goto out; ret = -ENOMEM; @@ -591,7 +594,7 @@ if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task) || gr_acl_handle_procpidmem(task)) goto out; copied = -ENOMEM; @@ -969,7 +972,11 @@ inode->i_gid = 0; if (task_dumpable(task)) { inode->i_uid = task->euid; +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; +#else inode->i_gid = task->egid; +#endif } security_task_to_inode(task, inode); @@ -985,17 +992,38 @@ { struct inode *inode = dentry->d_inode; struct task_struct *task; +#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + struct task_struct *tmp = current; +#endif + generic_fillattr(inode, stat); rcu_read_lock(); stat->uid = 0; stat->gid = 0; task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) { + if (task && +#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + (!tmp->uid || (tmp->uid == task->uid) +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + || in_group_p(CONFIG_GRKERNSEC_PROC_GID) +#endif + ) && +#endif + !gr_pid_is_chrooted(task) && !gr_check_hidden_task(task)) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || +#ifdef CONFIG_GRKERNSEC_PROC_USER + (inode->i_mode == (S_IFDIR|S_IRUSR|S_IXUSR)) || +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + (inode->i_mode == (S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP)) || +#endif task_dumpable(task)) { stat->uid = task->euid; +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + stat->gid = CONFIG_GRKERNSEC_PROC_GID; +#else stat->gid = task->egid; +#endif } } rcu_read_unlock(); @@ -1025,9 +1053,18 @@ struct task_struct *task = get_proc_task(inode); if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || +#ifdef CONFIG_GRKERNSEC_PROC_USER + (inode->i_mode == (S_IFDIR|S_IRUSR|S_IXUSR)) || +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + (inode->i_mode == (S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP)) || +#endif task_dumpable(task)) { inode->i_uid = task->euid; +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; +#else inode->i_gid = task->egid; +#endif } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1273,6 +1310,9 @@ if (fd == ~0U) goto out; + if (gr_acl_handle_procpidmem(task)) + goto out; + result = proc_fd_instantiate(dir, dentry, task, &fd); out: put_task_struct(task); @@ -1317,6 +1357,8 @@ goto out; filp->f_pos++; default: + if (gr_acl_handle_procpidmem(p)) + goto out; files = get_files_struct(p); if (!files) goto out; @@ -1791,6 +1833,9 @@ #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, loginuid), #endif +#ifdef CONFIG_GRKERNSEC_PROC_IPADDR + INF("ipaddr", S_IRUSR, pid_ipaddr), +#endif }; static int proc_tgid_base_readdir(struct file * filp, @@ -1893,7 +1938,14 @@ if (!inode) goto out; +#ifdef CONFIG_GRKERNSEC_PROC_USER + inode->i_mode = S_IFDIR|S_IRUSR|S_IXUSR; +#elif CONFIG_GRKERNSEC_PROC_USERGROUP + inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; + inode->i_mode = S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP; +#else inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; +#endif inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; @@ -1934,7 +1986,11 @@ if (!task) goto out; + if (gr_check_hidden_task(task)) + goto out_put_task; + result = proc_pid_instantiate(dir, dentry, task, NULL); +out_put_task: put_task_struct(task); out: return result; @@ -1992,6 +2048,9 @@ { unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; struct task_struct *reaper = get_proc_task(filp->f_dentry->d_inode); +#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + struct task_struct *tmp = current; +#endif struct task_struct *task; int tgid; @@ -2009,6 +2068,18 @@ task; put_task_struct(task), task = next_tgid(tgid + 1)) { tgid = task->pid; + + if (gr_pid_is_chrooted(task) || gr_check_hidden_task(task) +#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + || (tmp->uid && (task->uid != tmp->uid) +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + && !in_group_p(CONFIG_GRKERNSEC_PROC_GID) +#endif + ) +#endif + ) + continue; + filp->f_pos = tgid + TGID_OFFSET; if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) { put_task_struct(task); diff -Naur linux-2.6.19.2.orig/fs/proc/inode.c linux-2.6.19.2/fs/proc/inode.c --- linux-2.6.19.2.orig/fs/proc/inode.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/proc/inode.c 2007-01-12 23:27:23.000000000 +0000 @@ -166,7 +166,11 @@ if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; +#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP + inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; +#else inode->i_gid = de->gid; +#endif } if (de->size) inode->i_size = de->size; diff -Naur linux-2.6.19.2.orig/fs/proc/internal.h linux-2.6.19.2/fs/proc/internal.h --- linux-2.6.19.2.orig/fs/proc/internal.h 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/proc/internal.h 2007-01-12 23:27:23.000000000 +0000 @@ -37,6 +37,9 @@ extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); +#ifdef CONFIG_GRKERNSEC_PROC_IPADDR +extern int proc_pid_ipaddr(struct task_struct*,char*); +#endif extern struct file_operations proc_maps_operations; extern struct file_operations proc_numa_maps_operations; diff -Naur linux-2.6.19.2.orig/fs/proc/proc_misc.c linux-2.6.19.2/fs/proc/proc_misc.c --- linux-2.6.19.2.orig/fs/proc/proc_misc.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/proc/proc_misc.c 2007-01-12 23:27:23.000000000 +0000 @@ -670,6 +670,8 @@ void __init proc_misc_init(void) { struct proc_dir_entry *entry; + int gr_mode = 0; + static struct { char *name; int (*read_proc)(char*,char**,off_t,int,int*,void*); @@ -685,7 +687,9 @@ {"stram", stram_read_proc}, #endif {"filesystems", filesystems_read_proc}, +#ifndef CONFIG_GRKERNSEC_PROC_ADD {"cmdline", cmdline_read_proc}, +#endif {"locks", locks_read_proc}, {"execdomains", execdomains_read_proc}, {NULL,} @@ -693,13 +697,26 @@ for (p = simple_ones; p->name; p++) create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL); +#ifdef CONFIG_GRKERNSEC_PROC_USER + gr_mode = S_IRUSR; +#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + gr_mode = S_IRUSR | S_IRGRP; +#endif +#ifdef CONFIG_GRKERNSEC_PROC_ADD + create_proc_read_entry("cmdline", gr_mode, NULL, &cmdline_read_proc, NULL); +#endif + proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); if (entry) entry->proc_fops = &proc_kmsg_operations; +#ifdef CONFIG_GRKERNSEC_PROC_ADD + create_seq_entry("devices", gr_mode, &proc_devinfo_operations); +#else create_seq_entry("devices", 0, &proc_devinfo_operations); +#endif create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK create_seq_entry("partitions", 0, &proc_partitions_operations); @@ -707,7 +724,11 @@ create_seq_entry("stat", 0, &proc_stat_operations); create_seq_entry("interrupts", 0, &proc_interrupts_operations); #ifdef CONFIG_SLAB +#ifdef CONFIG_GRKERNSEC_PROC_ADD + create_seq_entry("slabinfo",S_IWUSR|gr_mode,&proc_slabinfo_operations); +#else create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); +#endif #ifdef CONFIG_DEBUG_SLAB_LEAK create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); #endif @@ -724,7 +745,7 @@ #ifdef CONFIG_SCHEDSTATS create_seq_entry("schedstat", 0, &proc_schedstat_operations); #endif -#ifdef CONFIG_PROC_KCORE +#if defined(CONFIG_PROC_KCORE) && !defined(CONFIG_GRKERNSEC_PROC_ADD) proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); if (proc_root_kcore) { proc_root_kcore->proc_fops = &proc_kcore_operations; diff -Naur linux-2.6.19.2.orig/fs/proc/root.c linux-2.6.19.2/fs/proc/root.c --- linux-2.6.19.2.orig/fs/proc/root.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/proc/root.c 2007-01-12 23:27:23.000000000 +0000 @@ -64,7 +64,13 @@ return; } proc_misc_init(); +#ifdef CONFIG_GRKERNSEC_PROC_USER + proc_net = proc_mkdir_mode("net", S_IRUSR | S_IXUSR, NULL); +#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + proc_net = proc_mkdir_mode("net", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); +#else proc_net = proc_mkdir("net", NULL); +#endif proc_net_stat = proc_mkdir("net/stat", NULL); #ifdef CONFIG_SYSVIPC @@ -88,7 +94,15 @@ #ifdef CONFIG_PROC_DEVICETREE proc_device_tree_init(); #endif +#ifdef CONFIG_GRKERNSEC_PROC_ADD +#ifdef CONFIG_GRKERNSEC_PROC_USER + proc_bus = proc_mkdir_mode("bus", S_IRUSR | S_IXUSR, NULL); +#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) + proc_bus = proc_mkdir_mode("bus", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); +#endif +#else proc_bus = proc_mkdir("bus", NULL); +#endif } static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat diff -Naur linux-2.6.19.2.orig/fs/proc/task_mmu.c linux-2.6.19.2/fs/proc/task_mmu.c --- linux-2.6.19.2.orig/fs/proc/task_mmu.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/proc/task_mmu.c 2007-01-12 23:27:23.000000000 +0000 @@ -43,15 +43,27 @@ "VmStk:\t%8lu kB\n" "VmExe:\t%8lu kB\n" "VmLib:\t%8lu kB\n" - "VmPTE:\t%8lu kB\n", - hiwater_vm << (PAGE_SHIFT-10), + "VmPTE:\t%8lu kB\n" + +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT + "CsBase:\t%8lx\nCsLim:\t%8lx\n" +#endif + + ,hiwater_vm << (PAGE_SHIFT-10), (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), mm->locked_vm << (PAGE_SHIFT-10), hiwater_rss << (PAGE_SHIFT-10), total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); + (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10 + +#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT + , mm->context.user_cs_base, mm->context.user_cs_limit +#endif + + ); + return buffer; } @@ -122,6 +134,12 @@ unsigned long private_dirty; }; +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP +#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \ + (_mm->pax_flags & MF_PAX_RANDMMAP || \ + _mm->pax_flags & MF_PAX_SEGMEXEC)) +#endif + static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) { struct proc_maps_private *priv = m->private; @@ -141,13 +159,30 @@ } seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_start, + PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_end, +#else vma->vm_start, vma->vm_end, +#endif + +#if 0 + flags & VM_MAYREAD ? flags & VM_READ ? 'R' : '+' : flags & VM_READ ? 'r' : '-', + flags & VM_MAYWRITE ? flags & VM_WRITE ? 'W' : '+' : flags & VM_WRITE ? 'w' : '-', + flags & VM_MAYEXEC ? flags & VM_EXEC ? 'X' : '+' : flags & VM_EXEC ? 'x' : '-', +#else flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', +#endif + flags & VM_MAYSHARE ? 's' : 'p', +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_pgoff << PAGE_SHIFT, +#else vma->vm_pgoff << PAGE_SHIFT, +#endif MAJOR(dev), MINOR(dev), ino, &len); /* @@ -161,11 +196,11 @@ const char *name = arch_vma_name(vma); if (!name) { if (mm) { - if (vma->vm_start <= mm->start_brk && - vma->vm_end >= mm->brk) { + if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { name = "[heap]"; - } else if (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack) { + } else if ((vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP)) || + (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack)) { name = "[stack]"; } } else { @@ -179,7 +214,25 @@ } seq_putc(m, '\n'); - if (mss) + + if (mss) { +#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP + if (PAX_RAND_FLAGS(mm)) + seq_printf(m, + "Size: %8lu kB\n" + "Rss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n", + 0UL, + 0UL, + 0UL, + 0UL, + 0UL, + 0UL); + else +#endif seq_printf(m, "Size: %8lu kB\n" "Rss: %8lu kB\n" @@ -193,6 +246,7 @@ mss->shared_dirty >> 10, mss->private_clean >> 10, mss->private_dirty >> 10); + } if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; diff -Naur linux-2.6.19.2.orig/fs/readdir.c linux-2.6.19.2/fs/readdir.c --- linux-2.6.19.2.orig/fs/readdir.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/readdir.c 2007-01-12 23:27:23.000000000 +0000 @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include @@ -65,6 +67,7 @@ struct readdir_callback { struct old_linux_dirent __user * dirent; + struct file * file; int result; }; @@ -80,6 +83,10 @@ d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) return -EOVERFLOW; + + if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) + return 0; + buf->result++; dirent = buf->dirent; if (!access_ok(VERIFY_WRITE, dirent, @@ -111,6 +118,7 @@ buf.result = 0; buf.dirent = dirent; + buf.file = file; error = vfs_readdir(file, fillonedir, &buf); if (error >= 0) @@ -137,6 +145,7 @@ struct getdents_callback { struct linux_dirent __user * current_dir; struct linux_dirent __user * previous; + struct file * file; int count; int error; }; @@ -155,6 +164,10 @@ d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) return -EOVERFLOW; + + if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) + return 0; + dirent = buf->previous; if (dirent) { if (__put_user(offset, &dirent->d_off)) @@ -201,6 +214,7 @@ buf.previous = NULL; buf.count = count; buf.error = 0; + buf.file = file; error = vfs_readdir(file, filldir, &buf); if (error < 0) @@ -225,6 +239,7 @@ struct getdents_callback64 { struct linux_dirent64 __user * current_dir; struct linux_dirent64 __user * previous; + struct file *file; int count; int error; }; @@ -239,6 +254,10 @@ buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; + + if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) + return 0; + dirent = buf->previous; if (dirent) { if (__put_user(offset, &dirent->d_off)) @@ -285,6 +304,7 @@ buf.current_dir = dirent; buf.previous = NULL; + buf.file = file; buf.count = count; buf.error = 0; diff -Naur linux-2.6.19.2.orig/fs/udf/balloc.c linux-2.6.19.2/fs/udf/balloc.c --- linux-2.6.19.2.orig/fs/udf/balloc.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/udf/balloc.c 2007-01-12 23:27:23.000000000 +0000 @@ -153,8 +153,7 @@ unsigned long overflow; mutex_lock(&sbi->s_alloc_mutex); - if (bloc.logicalBlockNum < 0 || - (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) + if (bloc.logicalBlockNum + count > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) { udf_debug("%d < %d || %d + %d > %d\n", bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, @@ -227,7 +226,7 @@ struct buffer_head *bh; mutex_lock(&sbi->s_alloc_mutex); - if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition)) + if (first_block >= UDF_SB_PARTLEN(sb, partition)) goto out; if (first_block + block_count > UDF_SB_PARTLEN(sb, partition)) @@ -294,7 +293,7 @@ mutex_lock(&sbi->s_alloc_mutex); repeat: - if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition)) + if (goal >= UDF_SB_PARTLEN(sb, partition)) goal = 0; nr_groups = bitmap->s_nr_groups; @@ -434,8 +433,7 @@ int i; mutex_lock(&sbi->s_alloc_mutex); - if (bloc.logicalBlockNum < 0 || - (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) + if (bloc.logicalBlockNum + count > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) { udf_debug("%d < %d || %d + %d > %d\n", bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, @@ -682,7 +680,7 @@ struct buffer_head *bh; int8_t etype = -1; - if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition)) + if (first_block >= UDF_SB_PARTLEN(sb, partition)) return 0; if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_SHORT) @@ -762,7 +760,7 @@ return newblock; mutex_lock(&sbi->s_alloc_mutex); - if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition)) + if (goal >= UDF_SB_PARTLEN(sb, partition)) goal = 0; /* We search for the closest matching block to goal. If we find a exact hit, diff -Naur linux-2.6.19.2.orig/fs/udf/inode.c linux-2.6.19.2/fs/udf/inode.c --- linux-2.6.19.2.orig/fs/udf/inode.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/udf/inode.c 2007-01-12 23:27:23.000000000 +0000 @@ -300,9 +300,6 @@ lock_kernel(); - if (block < 0) - goto abort_negative; - if (block == UDF_I_NEXT_ALLOC_BLOCK(inode) + 1) { UDF_I_NEXT_ALLOC_BLOCK(inode) ++; @@ -323,10 +320,6 @@ abort: unlock_kernel(); return err; - -abort_negative: - udf_warning(inode->i_sb, "udf_get_block", "block < 0"); - goto abort; } static struct buffer_head * diff -Naur linux-2.6.19.2.orig/fs/ufs/inode.c linux-2.6.19.2/fs/ufs/inode.c --- linux-2.6.19.2.orig/fs/ufs/inode.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/ufs/inode.c 2007-01-12 23:27:23.000000000 +0000 @@ -55,9 +55,7 @@ UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks); - if (i_block < 0) { - ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0"); - } else if (i_block < direct_blocks) { + if (i_block < direct_blocks) { offsets[n++] = i_block; } else if ((i_block -= direct_blocks) < indirect_blocks) { offsets[n++] = UFS_IND_BLOCK; @@ -454,8 +452,6 @@ lock_kernel(); UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); - if (fragment < 0) - goto abort_negative; if (fragment > ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) << uspi->s_fpbshift)) @@ -516,10 +512,6 @@ unlock_kernel(); return err; -abort_negative: - ufs_warning(sb, "ufs_get_block", "block < 0"); - goto abort; - abort_too_big: ufs_warning(sb, "ufs_get_block", "block > big"); goto abort; diff -Naur linux-2.6.19.2.orig/fs/utimes.c linux-2.6.19.2/fs/utimes.c --- linux-2.6.19.2.orig/fs/utimes.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/utimes.c 2007-01-12 23:27:23.000000000 +0000 @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,12 @@ (error = vfs_permission(&nd, MAY_WRITE)) != 0) goto dput_and_out; } + + if (!gr_acl_handle_utime(nd.dentry, nd.mnt)) { + error = -EACCES; + goto dput_and_out; + } + mutex_lock(&inode->i_mutex); error = notify_change(nd.dentry, &newattrs); mutex_unlock(&inode->i_mutex); @@ -113,6 +120,12 @@ (error = vfs_permission(&nd, MAY_WRITE)) != 0) goto dput_and_out; } + + if (!gr_acl_handle_utime(nd.dentry, nd.mnt)) { + error = -EACCES; + goto dput_and_out; + } + mutex_lock(&inode->i_mutex); error = notify_change(nd.dentry, &newattrs); mutex_unlock(&inode->i_mutex); diff -Naur linux-2.6.19.2.orig/fs/xfs/linux-2.6/xfs_file.c linux-2.6.19.2/fs/xfs/linux-2.6/xfs_file.c --- linux-2.6.19.2.orig/fs/xfs/linux-2.6/xfs_file.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/xfs/linux-2.6/xfs_file.c 2007-01-12 23:27:23.000000000 +0000 @@ -342,6 +342,12 @@ struct file *filp, struct vm_area_struct *vma) { + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) + if ((vma->vm_mm->pax_flags & MF_PAX_PAGEEXEC) && !(vma->vm_flags & VM_EXEC)) + vma->vm_page_prot = __pgprot(pte_val(pte_exprotect(__pte(pgprot_val(vma->vm_page_prot))))); +#endif + vma->vm_ops = &xfs_file_vm_ops; #ifdef CONFIG_XFS_DMAPI diff -Naur linux-2.6.19.2.orig/fs/xfs/xfs_bmap.c linux-2.6.19.2/fs/xfs/xfs_bmap.c --- linux-2.6.19.2.orig/fs/xfs/xfs_bmap.c 2007-01-10 19:10:37.000000000 +0000 +++ linux-2.6.19.2/fs/xfs/xfs_bmap.c 2007-01-12 23:27:23.000000000 +0000 @@ -376,7 +376,7 @@ int nmap, int ret_nmap); #else -#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) +#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do {} while (0) #endif /* DEBUG */ #if defined(XFS_RW_TRACE) diff -Naur linux-2.6.19.2.orig/grsecurity/Kconfig linux-2.6.19.2/grsecurity/Kconfig --- linux-2.6.19.2.orig/grsecurity/Kconfig 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/Kconfig 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,872 @@ +# +# grecurity configuration +# + +menu "Grsecurity" + +config GRKERNSEC + bool "Grsecurity" + select CRYPTO + select CRYPTO_SHA256 + help + If you say Y here, you will be able to configure many features + that will enhance the security of your system. It is highly + recommended that you say Y here and read through the help + for each option so that you fully understand the features and + can evaluate their usefulness for your machine. + +choice + prompt "Security Level" + depends GRKERNSEC + default GRKERNSEC_CUSTOM + +config GRKERNSEC_LOW + bool "Low" + select GRKERNSEC_LINK + select GRKERNSEC_FIFO + select GRKERNSEC_EXECVE + select GRKERNSEC_RANDNET + select GRKERNSEC_DMESG + select GRKERNSEC_CHROOT_CHDIR + select GRKERNSEC_MODSTOP if (MODULES) + + help + If you choose this option, several of the grsecurity options will + be enabled that will give you greater protection against a number + of attacks, while assuring that none of your software will have any + conflicts with the additional security measures. If you run a lot + of unusual software, or you are having problems with the higher + security levels, you should say Y here. With this option, the + following features are enabled: + + - Linking restrictions + - FIFO restrictions + - Enforcing RLIMIT_NPROC on execve + - Restricted dmesg + - Enforced chdir("/") on chroot + - Runtime module disabling + +config GRKERNSEC_MEDIUM + bool "Medium" + select PAX + select PAX_EI_PAX + select PAX_PT_PAX_FLAGS + select PAX_HAVE_ACL_FLAGS + select GRKERNSEC_PROC_MEMMAP if (PAX_NOEXEC || PAX_ASLR) + select GRKERNSEC_CHROOT_SYSCTL + select GRKERNSEC_LINK + select GRKERNSEC_FIFO + select GRKERNSEC_EXECVE + select GRKERNSEC_DMESG + select GRKERNSEC_RANDNET + select GRKERNSEC_FORKFAIL + select GRKERNSEC_TIME + select GRKERNSEC_SIGNAL + select GRKERNSEC_CHROOT + select GRKERNSEC_CHROOT_UNIX + select GRKERNSEC_CHROOT_MOUNT + select GRKERNSEC_CHROOT_PIVOT + select GRKERNSEC_CHROOT_DOUBLE + select GRKERNSEC_CHROOT_CHDIR + select GRKERNSEC_CHROOT_MKNOD + select GRKERNSEC_PROC + select GRKERNSEC_PROC_USERGROUP + select GRKERNSEC_MODSTOP if (MODULES) + select PAX_RANDUSTACK + select PAX_ASLR + select PAX_RANDMMAP + + help + If you say Y here, several features in addition to those included + in the low additional security level will be enabled. These + features provide even more security to your system, though in rare + cases they may be incompatible with very old or poorly written + software. If you enable this option, make sure that your auth + service (identd) is running as gid 1001. With this option, + the following features (in addition to those provided in the + low additional security level) will be enabled: + + - Randomized TCP source ports + - Failed fork logging + - Time change logging + - Signal logging + - Deny mounts in chroot + - Deny double chrooting + - Deny sysctl writes in chroot + - Deny mknod in chroot + - Deny access to abstract AF_UNIX sockets out of chroot + - Deny pivot_root in chroot + - Denied writes of /dev/kmem, /dev/mem, and /dev/port + - /proc restrictions with special GID set to 10 (usually wheel) + - Address Space Layout Randomization (ASLR) + +config GRKERNSEC_HIGH + bool "High" + select GRKERNSEC_LINK + select GRKERNSEC_FIFO + select GRKERNSEC_EXECVE + select GRKERNSEC_DMESG + select GRKERNSEC_FORKFAIL + select GRKERNSEC_TIME + select GRKERNSEC_SIGNAL + select GRKERNSEC_CHROOT_SHMAT + select GRKERNSEC_CHROOT_UNIX + select GRKERNSEC_CHROOT_MOUNT + select GRKERNSEC_CHROOT_FCHDIR + select GRKERNSEC_CHROOT_PIVOT + select GRKERNSEC_CHROOT_DOUBLE + select GRKERNSEC_CHROOT_CHDIR + select GRKERNSEC_CHROOT_MKNOD + select GRKERNSEC_CHROOT_CAPS + select GRKERNSEC_CHROOT_SYSCTL + select GRKERNSEC_CHROOT_FINDTASK + select GRKERNSEC_PROC + select GRKERNSEC_PROC_MEMMAP if (PAX_NOEXEC || PAX_ASLR) + select GRKERNSEC_HIDESYM + select GRKERNSEC_BRUTE + select GRKERNSEC_SHM if (SYSVIPC) + select GRKERNSEC_PROC_USERGROUP + select GRKERNSEC_KMEM + select GRKERNSEC_RESLOG + select GRKERNSEC_RANDNET + select GRKERNSEC_PROC_ADD + select GRKERNSEC_CHROOT_CHMOD + select GRKERNSEC_CHROOT_NICE + select GRKERNSEC_AUDIT_MOUNT + select GRKERNSEC_MODSTOP if (MODULES) + select PAX + select PAX_RANDUSTACK + select PAX_ASLR + select PAX_RANDMMAP + select PAX_NOEXEC + select PAX_MPROTECT + select PAX_EI_PAX + select PAX_PT_PAX_FLAGS + select PAX_HAVE_ACL_FLAGS + select PAX_KERNEXEC if (!X86_64 && !MODULES && !HOTPLUG_PCI_COMPAQ_NVRAM && !PCI_BIOS) + select PAX_RANDKSTACK if (X86_TSC && !X86_64) + select PAX_SEGMEXEC if (X86 && !X86_64) + select PAX_PAGEEXEC if (!X86) + select PAX_EMUPLT if (ALPHA || PARISC || PPC32 || SPARC32 || SPARC64) + select PAX_DLRESOLVE if (SPARC32 || SPARC64) + select PAX_SYSCALL if (PPC32) + select PAX_EMUTRAMP if (PARISC) + select PAX_EMUSIGRT if (PARISC) + select PAX_ETEXECRELOCS if (ALPHA || IA64 || PARISC) + help + If you say Y here, many of the features of grsecurity will be + enabled, which will protect you against many kinds of attacks + against your system. The heightened security comes at a cost + of an increased chance of incompatibilities with rare software + on your machine. Since this security level enables PaX, you should + view and read about the PaX + project. While you are there, download chpax and run it on + binaries that cause problems with PaX. Also remember that + since the /proc restrictions are enabled, you must run your + identd as gid 1001. This security level enables the following + features in addition to those listed in the low and medium + security levels: + + - Additional /proc restrictions + - Chmod restrictions in chroot + - No signals, ptrace, or viewing of processes outside of chroot + - Capability restrictions in chroot + - Deny fchdir out of chroot + - Priority restrictions in chroot + - Segmentation-based implementation of PaX + - Mprotect restrictions + - Removal of addresses from /proc//[smaps|maps|stat] + - Kernel stack randomization + - Mount/unmount/remount logging + - Kernel symbol hiding + - Destroy unused shared memory + - Prevention of memory exhaustion-based exploits +config GRKERNSEC_CUSTOM + bool "Custom" + help + If you say Y here, you will be able to configure every grsecurity + option, which allows you to enable many more features that aren't + covered in the basic security levels. These additional features + include TPE, socket restrictions, and the sysctl system for + grsecurity. It is advised that you read through the help for + each option to determine its usefulness in your situation. + +endchoice + +menu "Address Space Protection" +depends on GRKERNSEC + +config GRKERNSEC_KMEM + bool "Deny writing to /dev/kmem, /dev/mem, and /dev/port" + help + If you say Y here, /dev/kmem and /dev/mem won't be allowed to + be written to via mmap or otherwise to modify the running kernel. + /dev/port will also not be allowed to be opened. If you have module + support disabled, enabling this will close up four ways that are + currently used to insert malicious code into the running kernel. + Even with all these features enabled, we still highly recommend that + you use the RBAC system, as it is still possible for an attacker to + modify the running kernel through privileged I/O granted by ioperm/iopl. + If you are not using XFree86, you may be able to stop this additional + case by enabling the 'Disable privileged I/O' option. Though nothing + legitimately writes to /dev/kmem, XFree86 does need to write to /dev/mem, + but only to video memory, which is the only writing we allow in this + case. If /dev/kmem or /dev/mem are mmaped without PROT_WRITE, they will + not be allowed to mprotect it with PROT_WRITE later. + It is highly recommended that you say Y here if you meet all the + conditions above. + +config GRKERNSEC_IO + bool "Disable privileged I/O" + depends on X86 + select RTC + help + If you say Y here, all ioperm and iopl calls will return an error. + Ioperm and iopl can be used to modify the running kernel. + Unfortunately, some programs need this access to operate properly, + the most notable of which are XFree86 and hwclock. hwclock can be + remedied by having RTC support in the kernel, so CONFIG_RTC is + enabled if this option is enabled, to ensure that hwclock operates + correctly. XFree86 still will not operate correctly with this option + enabled, so DO NOT CHOOSE Y IF YOU USE XFree86. If you use XFree86 + and you still want to protect your kernel against modification, + use the RBAC system. + +config GRKERNSEC_PROC_MEMMAP + bool "Remove addresses from /proc//[smaps|maps|stat]" + depends on PAX_NOEXEC || PAX_ASLR + help + If you say Y here, the /proc//maps and /proc//stat files will + give no information about the addresses of its mappings if + PaX features that rely on random addresses are enabled on the task. + If you use PaX it is greatly recommended that you say Y here as it + closes up a hole that makes the full ASLR useless for suid + binaries. + +config GRKERNSEC_BRUTE + bool "Deter exploit bruteforcing" + help + If you say Y here, attempts to bruteforce exploits against forking + daemons such as apache or sshd will be deterred. When a child of a + forking daemon is killed by PaX or crashes due to an illegal + instruction, the parent process will be delayed 30 seconds upon every + subsequent fork until the administrator is able to assess the + situation and restart the daemon. It is recommended that you also + enable signal logging in the auditing section so that logs are + generated when a process performs an illegal instruction. + +config GRKERNSEC_MODSTOP + bool "Runtime module disabling" + depends on MODULES + help + If you say Y here, you will be able to disable the ability to (un)load + modules at runtime. This feature is useful if you need the ability + to load kernel modules at boot time, but do not want to allow an + attacker to load a rootkit kernel module into the system, or to remove + a loaded kernel module important to system functioning. You should + enable the /dev/mem protection feature as well, since rootkits can be + inserted into the kernel via other methods than kernel modules. Since + an untrusted module could still be loaded by modifying init scripts and + rebooting the system, it is also recommended that you enable the RBAC + system. If you enable this option, a sysctl option with name + "disable_modules" will be created. Setting this option to "1" disables + module loading. After this option is set, no further writes to it are + allowed until the system is rebooted. + +config GRKERNSEC_HIDESYM + bool "Hide kernel symbols" + help + If you say Y here, getting information on loaded modules, and + displaying all kernel symbols through a syscall will be restricted + to users with CAP_SYS_MODULE. This option is only effective + provided the following conditions are met: + 1) The kernel using grsecurity is not precompiled by some distribution + 2) You are using the RBAC system and hiding other files such as your + kernel image and System.map + 3) You have the additional /proc restrictions enabled, which removes + /proc/kcore + If the above conditions are met, this option will aid to provide a + useful protection against local and remote kernel exploitation of + overflows and arbitrary read/write vulnerabilities. + +endmenu +menu "Role Based Access Control Options" +depends on GRKERNSEC + +config GRKERNSEC_ACL_HIDEKERN + bool "Hide kernel processes" + help + If you say Y here, all kernel threads will be hidden to all + processes but those whose subject has the "view hidden processes" + flag. + +config GRKERNSEC_ACL_MAXTRIES + int "Maximum tries before password lockout" + default 3 + help + This option enforces the maximum number of times a user can attempt + to authorize themselves with the grsecurity RBAC system before being + denied the ability to attempt authorization again for a specified time. + The lower the number, the harder it will be to brute-force a password. + +config GRKERNSEC_ACL_TIMEOUT + int "Time to wait after max password tries, in seconds" + default 30 + help + This option specifies the time the user must wait after attempting to + authorize to the RBAC system with the maximum number of invalid + passwords. The higher the number, the harder it will be to brute-force + a password. + +endmenu +menu "Filesystem Protections" +depends on GRKERNSEC + +config GRKERNSEC_PROC + bool "Proc restrictions" + help + If you say Y here, the permissions of the /proc filesystem + will be altered to enhance system security and privacy. You MUST + choose either a user only restriction or a user and group restriction. + Depending upon the option you choose, you can either restrict users to + see only the processes they themselves run, or choose a group that can + view all processes and files normally restricted to root if you choose + the "restrict to user only" option. NOTE: If you're running identd as + a non-root user, you will have to run it as the group you specify here. + +config GRKERNSEC_PROC_USER + bool "Restrict /proc to user only" + depends on GRKERNSEC_PROC + help + If you say Y here, non-root users will only be able to view their own + processes, and restricts them from viewing network-related information, + and viewing kernel symbol and module information. + +config GRKERNSEC_PROC_USERGROUP + bool "Allow special group" + depends on GRKERNSEC_PROC && !GRKERNSEC_PROC_USER + help + If you say Y here, you will be able to select a group that will be + able to view all processes, network-related information, and + kernel and symbol information. This option is useful if you want + to run identd as a non-root user. + +config GRKERNSEC_PROC_GID + int "GID for special group" + depends on GRKERNSEC_PROC_USERGROUP + default 1001 + +config GRKERNSEC_PROC_ADD + bool "Additional restrictions" + depends on GRKERNSEC_PROC_USER || GRKERNSEC_PROC_USERGROUP + help + If you say Y here, additional restrictions will be placed on + /proc that keep normal users from viewing device information and + slabinfo information that could be useful for exploits. + +config GRKERNSEC_LINK + bool "Linking restrictions" + help + If you say Y here, /tmp race exploits will be prevented, since users + will no longer be able to follow symlinks owned by other users in + world-writable +t directories (i.e. /tmp), unless the owner of the + symlink is the owner of the directory. users will also not be + able to hardlink to files they do not own. If the sysctl option is + enabled, a sysctl option with name "linking_restrictions" is created. + +config GRKERNSEC_FIFO + bool "FIFO restrictions" + help + If you say Y here, users will not be able to write to FIFOs they don't + own in world-writable +t directories (i.e. /tmp), unless the owner of + the FIFO is the same owner of the directory it's held in. If the sysctl + option is enabled, a sysctl option with name "fifo_restrictions" is + created. + +config GRKERNSEC_CHROOT + bool "Chroot jail restrictions" + help + If you say Y here, you will be able to choose several options that will + make breaking out of a chrooted jail much more difficult. If you + encounter no software incompatibilities with the following options, it + is recommended that you enable each one. + +config GRKERNSEC_CHROOT_MOUNT + bool "Deny mounts" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to + mount or remount filesystems. If the sysctl option is enabled, a + sysctl option with name "chroot_deny_mount" is created. + +config GRKERNSEC_CHROOT_DOUBLE + bool "Deny double-chroots" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to chroot + again outside the chroot. This is a widely used method of breaking + out of a chroot jail and should not be allowed. If the sysctl + option is enabled, a sysctl option with name + "chroot_deny_chroot" is created. + +config GRKERNSEC_CHROOT_PIVOT + bool "Deny pivot_root in chroot" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to use + a function called pivot_root() that was introduced in Linux 2.3.41. It + works similar to chroot in that it changes the root filesystem. This + function could be misused in a chrooted process to attempt to break out + of the chroot, and therefore should not be allowed. If the sysctl + option is enabled, a sysctl option with name "chroot_deny_pivot" is + created. + +config GRKERNSEC_CHROOT_CHDIR + bool "Enforce chdir(\"/\") on all chroots" + depends on GRKERNSEC_CHROOT + help + If you say Y here, the current working directory of all newly-chrooted + applications will be set to the the root directory of the chroot. + The man page on chroot(2) states: + Note that this call does not change the current working + directory, so that `.' can be outside the tree rooted at + `/'. In particular, the super-user can escape from a + `chroot jail' by doing `mkdir foo; chroot foo; cd ..'. + + It is recommended that you say Y here, since it's not known to break + any software. If the sysctl option is enabled, a sysctl option with + name "chroot_enforce_chdir" is created. + +config GRKERNSEC_CHROOT_CHMOD + bool "Deny (f)chmod +s" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to chmod + or fchmod files to make them have suid or sgid bits. This protects + against another published method of breaking a chroot. If the sysctl + option is enabled, a sysctl option with name "chroot_deny_chmod" is + created. + +config GRKERNSEC_CHROOT_FCHDIR + bool "Deny fchdir out of chroot" + depends on GRKERNSEC_CHROOT + help + If you say Y here, a well-known method of breaking chroots by fchdir'ing + to a file descriptor of the chrooting process that points to a directory + outside the filesystem will be stopped. If the sysctl option + is enabled, a sysctl option with name "chroot_deny_fchdir" is created. + +config GRKERNSEC_CHROOT_MKNOD + bool "Deny mknod" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be allowed to + mknod. The problem with using mknod inside a chroot is that it + would allow an attacker to create a device entry that is the same + as one on the physical root of your system, which could range from + anything from the console device to a device for your harddrive (which + they could then use to wipe the drive or steal data). It is recommended + that you say Y here, unless you run into software incompatibilities. + If the sysctl option is enabled, a sysctl option with name + "chroot_deny_mknod" is created. + +config GRKERNSEC_CHROOT_SHMAT + bool "Deny shmat() out of chroot" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to attach + to shared memory segments that were created outside of the chroot jail. + It is recommended that you say Y here. If the sysctl option is enabled, + a sysctl option with name "chroot_deny_shmat" is created. + +config GRKERNSEC_CHROOT_UNIX + bool "Deny access to abstract AF_UNIX sockets out of chroot" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to + connect to abstract (meaning not belonging to a filesystem) Unix + domain sockets that were bound outside of a chroot. It is recommended + that you say Y here. If the sysctl option is enabled, a sysctl option + with name "chroot_deny_unix" is created. + +config GRKERNSEC_CHROOT_FINDTASK + bool "Protect outside processes" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to + kill, send signals with fcntl, ptrace, capget, getpgid, getsid, + or view any process outside of the chroot. If the sysctl + option is enabled, a sysctl option with name "chroot_findtask" is + created. + +config GRKERNSEC_CHROOT_NICE + bool "Restrict priority changes" + depends on GRKERNSEC_CHROOT + help + If you say Y here, processes inside a chroot will not be able to raise + the priority of processes in the chroot, or alter the priority of + processes outside the chroot. This provides more security than simply + removing CAP_SYS_NICE from the process' capability set. If the + sysctl option is enabled, a sysctl option with name "chroot_restrict_nice" + is created. + +config GRKERNSEC_CHROOT_SYSCTL + bool "Deny sysctl writes" + depends on GRKERNSEC_CHROOT + help + If you say Y here, an attacker in a chroot will not be able to + write to sysctl entries, either by sysctl(2) or through a /proc + interface. It is strongly recommended that you say Y here. If the + sysctl option is enabled, a sysctl option with name + "chroot_deny_sysctl" is created. + +config GRKERNSEC_CHROOT_CAPS + bool "Capability restrictions" + depends on GRKERNSEC_CHROOT + help + If you say Y here, the capabilities on all root processes within a + chroot jail will be lowered to stop module insertion, raw i/o, + system and net admin tasks, rebooting the system, modifying immutable + files, modifying IPC owned by another, and changing the system time. + This is left an option because it can break some apps. Disable this + if your chrooted apps are having problems performing those kinds of + tasks. If the sysctl option is enabled, a sysctl option with + name "chroot_caps" is created. + +endmenu +menu "Kernel Auditing" +depends on GRKERNSEC + +config GRKERNSEC_AUDIT_GROUP + bool "Single group for auditing" + help + If you say Y here, the exec, chdir, (un)mount, and ipc logging features + will only operate on a group you specify. This option is recommended + if you only want to watch certain users instead of having a large + amount of logs from the entire system. If the sysctl option is enabled, + a sysctl option with name "audit_group" is created. + +config GRKERNSEC_AUDIT_GID + int "GID for auditing" + depends on GRKERNSEC_AUDIT_GROUP + default 1007 + +config GRKERNSEC_EXECLOG + bool "Exec logging" + help + If you say Y here, all execve() calls will be logged (since the + other exec*() calls are frontends to execve(), all execution + will be logged). Useful for shell-servers that like to keep track + of their users. If the sysctl option is enabled, a sysctl option with + name "exec_logging" is created. + WARNING: This option when enabled will produce a LOT of logs, especially + on an active system. + +config GRKERNSEC_RESLOG + bool "Resource logging" + help + If you say Y here, all attempts to overstep resource limits will + be logged with the resource name, the requested size, and the current + limit. It is highly recommended that you say Y here. If the sysctl + option is enabled, a sysctl option with name "resource_logging" is + created. If the RBAC system is enabled, the sysctl value is ignored. + +config GRKERNSEC_CHROOT_EXECLOG + bool "Log execs within chroot" + help + If you say Y here, all executions inside a chroot jail will be logged + to syslog. This can cause a large amount of logs if certain + applications (eg. djb's daemontools) are installed on the system, and + is therefore left as an option. If the sysctl option is enabled, a + sysctl option with name "chroot_execlog" is created. + +config GRKERNSEC_AUDIT_CHDIR + bool "Chdir logging" + help + If you say Y here, all chdir() calls will be logged. If the sysctl + option is enabled, a sysctl option with name "audit_chdir" is created. + +config GRKERNSEC_AUDIT_MOUNT + bool "(Un)Mount logging" + help + If you say Y here, all mounts and unmounts will be logged. If the + sysctl option is enabled, a sysctl option with name "audit_mount" is + created. + +config GRKERNSEC_AUDIT_IPC + bool "IPC logging" + help + If you say Y here, creation and removal of message queues, semaphores, + and shared memory will be logged. If the sysctl option is enabled, a + sysctl option with name "audit_ipc" is created. + +config GRKERNSEC_SIGNAL + bool "Signal logging" + help + If you say Y here, certain important signals will be logged, such as + SIGSEGV, which will as a result inform you of when a error in a program + occurred, which in some cases could mean a possible exploit attempt. + If the sysctl option is enabled, a sysctl option with name + "signal_logging" is created. + +config GRKERNSEC_FORKFAIL + bool "Fork failure logging" + help + If you say Y here, all failed fork() attempts will be logged. + This could suggest a fork bomb, or someone attempting to overstep + their process limit. If the sysctl option is enabled, a sysctl option + with name "forkfail_logging" is created. + +config GRKERNSEC_TIME + bool "Time change logging" + help + If you say Y here, any changes of the system clock will be logged. + If the sysctl option is enabled, a sysctl option with name + "timechange_logging" is created. + +config GRKERNSEC_PROC_IPADDR + bool "/proc//ipaddr support" + help + If you say Y here, a new entry will be added to each /proc/ + directory that contains the IP address of the person using the task. + The IP is carried across local TCP and AF_UNIX stream sockets. + This information can be useful for IDS/IPSes to perform remote response + to a local attack. The entry is readable by only the owner of the + process (and root if he has CAP_DAC_OVERRIDE, which can be removed via + the RBAC system), and thus does not create privacy concerns. + +config GRKERNSEC_AUDIT_TEXTREL + bool 'ELF text relocations logging (READ HELP)' + depends on PAX_MPROTECT + help + If you say Y here, text relocations will be logged with the filename + of the offending library or binary. The purpose of the feature is + to help Linux distribution developers get rid of libraries and + binaries that need text relocations which hinder the future progress + of PaX. Only Linux distribution developers should say Y here, and + never on a production machine, as this option creates an information + leak that could aid an attacker in defeating the randomization of + a single memory region. If the sysctl option is enabled, a sysctl + option with name "audit_textrel" is created. + +endmenu + +menu "Executable Protections" +depends on GRKERNSEC + +config GRKERNSEC_EXECVE + bool "Enforce RLIMIT_NPROC on execs" + help + If you say Y here, users with a resource limit on processes will + have the value checked during execve() calls. The current system + only checks the system limit during fork() calls. If the sysctl option + is enabled, a sysctl option with name "execve_limiting" is created. + +config GRKERNSEC_SHM + bool "Destroy unused shared memory" + depends on SYSVIPC + help + If you say Y here, shared memory will be destroyed when no one is + attached to it. Otherwise, resources involved with the shared + memory can be used up and not be associated with any process (as the + shared memory still exists, and the creating process has exited). If + the sysctl option is enabled, a sysctl option with name + "destroy_unused_shm" is created. + +config GRKERNSEC_DMESG + bool "Dmesg(8) restriction" + help + If you say Y here, non-root users will not be able to use dmesg(8) + to view up to the last 4kb of messages in the kernel's log buffer. + If the sysctl option is enabled, a sysctl option with name "dmesg" is + created. + +config GRKERNSEC_TPE + bool "Trusted Path Execution (TPE)" + help + If you say Y here, you will be able to choose a gid to add to the + supplementary groups of users you want to mark as "untrusted." + These users will not be able to execute any files that are not in + root-owned directories writable only by root. If the sysctl option + is enabled, a sysctl option with name "tpe" is created. + +config GRKERNSEC_TPE_ALL + bool "Partially restrict non-root users" + depends on GRKERNSEC_TPE + help + If you say Y here, All non-root users other than the ones in the + group specified in the main TPE option will only be allowed to + execute files in directories they own that are not group or + world-writable, or in directories owned by root and writable only by + root. If the sysctl option is enabled, a sysctl option with name + "tpe_restrict_all" is created. + +config GRKERNSEC_TPE_INVERT + bool "Invert GID option" + depends on GRKERNSEC_TPE + help + If you say Y here, the group you specify in the TPE configuration will + decide what group TPE restrictions will be *disabled* for. This + option is useful if you want TPE restrictions to be applied to most + users on the system. + +config GRKERNSEC_TPE_GID + int "GID for untrusted users" + depends on GRKERNSEC_TPE && !GRKERNSEC_TPE_INVERT + default 1005 + help + If you have selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *disabled* for. + If you have not selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *enabled* for. + If the sysctl option is enabled, a sysctl option with name "tpe_gid" + is created. + +config GRKERNSEC_TPE_GID + int "GID for trusted users" + depends on GRKERNSEC_TPE && GRKERNSEC_TPE_INVERT + default 1005 + help + If you have selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *disabled* for. + If you have not selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *enabled* for. + If the sysctl option is enabled, a sysctl option with name "tpe_gid" + is created. + +endmenu +menu "Network Protections" +depends on GRKERNSEC + +config GRKERNSEC_RANDNET + bool "Larger entropy pools" + help + If you say Y here, the entropy pools used for many features of Linux + and grsecurity will be doubled in size. Since several grsecurity + features use additional randomness, it is recommended that you say Y + here. Saying Y here has a similar effect as modifying + /proc/sys/kernel/random/poolsize. + +config GRKERNSEC_SOCKET + bool "Socket restrictions" + help + If you say Y here, you will be able to choose from several options. + If you assign a GID on your system and add it to the supplementary + groups of users you want to restrict socket access to, this patch + will perform up to three things, based on the option(s) you choose. + +config GRKERNSEC_SOCKET_ALL + bool "Deny any sockets to group" + depends on GRKERNSEC_SOCKET + help + If you say Y here, you will be able to choose a GID of whose users will + be unable to connect to other hosts from your machine or run server + applications from your machine. If the sysctl option is enabled, a + sysctl option with name "socket_all" is created. + +config GRKERNSEC_SOCKET_ALL_GID + int "GID to deny all sockets for" + depends on GRKERNSEC_SOCKET_ALL + default 1004 + help + Here you can choose the GID to disable socket access for. Remember to + add the users you want socket access disabled for to the GID + specified here. If the sysctl option is enabled, a sysctl option + with name "socket_all_gid" is created. + +config GRKERNSEC_SOCKET_CLIENT + bool "Deny client sockets to group" + depends on GRKERNSEC_SOCKET + help + If you say Y here, you will be able to choose a GID of whose users will + be unable to connect to other hosts from your machine, but will be + able to run servers. If this option is enabled, all users in the group + you specify will have to use passive mode when initiating ftp transfers + from the shell on your machine. If the sysctl option is enabled, a + sysctl option with name "socket_client" is created. + +config GRKERNSEC_SOCKET_CLIENT_GID + int "GID to deny client sockets for" + depends on GRKERNSEC_SOCKET_CLIENT + default 1003 + help + Here you can choose the GID to disable client socket access for. + Remember to add the users you want client socket access disabled for to + the GID specified here. If the sysctl option is enabled, a sysctl + option with name "socket_client_gid" is created. + +config GRKERNSEC_SOCKET_SERVER + bool "Deny server sockets to group" + depends on GRKERNSEC_SOCKET + help + If you say Y here, you will be able to choose a GID of whose users will + be unable to run server applications from your machine. If the sysctl + option is enabled, a sysctl option with name "socket_server" is created. + +config GRKERNSEC_SOCKET_SERVER_GID + int "GID to deny server sockets for" + depends on GRKERNSEC_SOCKET_SERVER + default 1002 + help + Here you can choose the GID to disable server socket access for. + Remember to add the users you want server socket access disabled for to + the GID specified here. If the sysctl option is enabled, a sysctl + option with name "socket_server_gid" is created. + +endmenu +menu "Sysctl support" +depends on GRKERNSEC && SYSCTL + +config GRKERNSEC_SYSCTL + bool "Sysctl support" + help + If you say Y here, you will be able to change the options that + grsecurity runs with at bootup, without having to recompile your + kernel. You can echo values to files in /proc/sys/kernel/grsecurity + to enable (1) or disable (0) various features. All the sysctl entries + are mutable until the "grsec_lock" entry is set to a non-zero value. + All features enabled in the kernel configuration are disabled at boot + if you do not say Y to the "Turn on features by default" option. + All options should be set at startup, and the grsec_lock entry should + be set to a non-zero value after all the options are set. + *THIS IS EXTREMELY IMPORTANT* + +config GRKERNSEC_SYSCTL_ON + bool "Turn on features by default" + depends on GRKERNSEC_SYSCTL + help + If you say Y here, instead of having all features enabled in the + kernel configuration disabled at boot time, the features will be + enabled at boot time. It is recommended you say Y here unless + there is some reason you would want all sysctl-tunable features to + be disabled by default. As mentioned elsewhere, it is important + to enable the grsec_lock entry once you have finished modifying + the sysctl entries. + +endmenu +menu "Logging Options" +depends on GRKERNSEC + +config GRKERNSEC_FLOODTIME + int "Seconds in between log messages (minimum)" + default 10 + help + This option allows you to enforce the number of seconds between + grsecurity log messages. The default should be suitable for most + people, however, if you choose to change it, choose a value small enough + to allow informative logs to be produced, but large enough to + prevent flooding. + +config GRKERNSEC_FLOODBURST + int "Number of messages in a burst (maximum)" + default 4 + help + This option allows you to choose the maximum number of messages allowed + within the flood time interval you chose in a separate option. The + default should be suitable for most people, however if you find that + many of your logs are being interpreted as flooding, you may want to + raise this value. + +endmenu + +endmenu diff -Naur linux-2.6.19.2.orig/grsecurity/Makefile linux-2.6.19.2/grsecurity/Makefile --- linux-2.6.19.2.orig/grsecurity/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/Makefile 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,20 @@ +# grsecurity's ACL system was originally written in 2001 by Michael Dalton +# during 2001-2005 it has been completely redesigned by Brad Spengler +# into an RBAC system +# +# All code in this directory and various hooks inserted throughout the kernel +# are copyright Brad Spengler, and released under the GPL v2 or higher + +obj-y = grsec_chdir.o grsec_chroot.o grsec_exec.o grsec_fifo.o grsec_fork.o \ + grsec_mount.o grsec_sig.o grsec_sock.o grsec_sysctl.o \ + grsec_time.o grsec_tpe.o grsec_ipc.o grsec_link.o grsec_textrel.o + +obj-$(CONFIG_GRKERNSEC) += grsec_init.o grsum.o gracl.o gracl_ip.o gracl_segv.o \ + gracl_cap.o gracl_alloc.o gracl_shm.o grsec_mem.o gracl_fs.o \ + gracl_learn.o grsec_log.o +obj-$(CONFIG_GRKERNSEC_RESLOG) += gracl_res.o + +ifndef CONFIG_GRKERNSEC +obj-y += grsec_disabled.o +endif + diff -Naur linux-2.6.19.2.orig/grsecurity/gracl.c linux-2.6.19.2/grsecurity/gracl.c --- linux-2.6.19.2.orig/grsecurity/gracl.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/gracl.c 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,3547 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static struct acl_role_db acl_role_set; +static struct name_db name_set; +static struct inodev_db inodev_set; + +/* for keeping track of userspace pointers used for subjects, so we + can share references in the kernel as well +*/ + +static struct dentry *real_root; +static struct vfsmount *real_root_mnt; + +static struct acl_subj_map_db subj_map_set; + +static struct acl_role_label *default_role; + +static u16 acl_sp_role_value; + +extern char *gr_shared_page[4]; +static DECLARE_MUTEX(gr_dev_sem); +rwlock_t gr_inode_lock = RW_LOCK_UNLOCKED; + +struct gr_arg *gr_usermode; + +static unsigned int gr_status = GR_STATUS_INIT; + +extern int chkpw(struct gr_arg *entry, unsigned char *salt, unsigned char *sum); +extern void gr_clear_learn_entries(void); + +#ifdef CONFIG_GRKERNSEC_RESLOG +extern void gr_log_resource(const struct task_struct *task, + const int res, const unsigned long wanted, const int gt); +#endif + +extern char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt, + struct dentry *root, struct vfsmount *rootmnt, + char *buffer, int buflen); + +unsigned char *gr_system_salt; +unsigned char *gr_system_sum; + +static struct sprole_pw **acl_special_roles = NULL; +static __u16 num_sprole_pws = 0; + +static struct acl_role_label *kernel_role = NULL; + +static unsigned int gr_auth_attempts = 0; +static unsigned long gr_auth_expires = 0UL; + +extern struct vfsmount *sock_mnt; +extern struct vfsmount *pipe_mnt; +extern struct vfsmount *shm_mnt; +static struct acl_object_label *fakefs_obj; + +extern int gr_init_uidset(void); +extern void gr_free_uidset(void); +extern void gr_remove_uid(uid_t uid); +extern int gr_find_uid(uid_t uid); + +__inline__ int +gr_acl_is_enabled(void) +{ + return (gr_status & GR_READY); +} + +char gr_roletype_to_char(void) +{ + switch (current->role->roletype & + (GR_ROLE_DEFAULT | GR_ROLE_USER | GR_ROLE_GROUP | + GR_ROLE_SPECIAL)) { + case GR_ROLE_DEFAULT: + return 'D'; + case GR_ROLE_USER: + return 'U'; + case GR_ROLE_GROUP: + return 'G'; + case GR_ROLE_SPECIAL: + return 'S'; + } + + return 'X'; +} + +__inline__ int +gr_acl_tpe_check(void) +{ + if (unlikely(!(gr_status & GR_READY))) + return 0; + if (current->role->roletype & GR_ROLE_TPE) + return 1; + else + return 0; +} + +int +gr_handle_rawio(const struct inode *inode) +{ +#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS + if (inode && S_ISBLK(inode->i_mode) && + grsec_enable_chroot_caps && proc_is_chrooted(current) && + !capable(CAP_SYS_RAWIO)) + return 1; +#endif + return 0; +} + +static int +gr_streq(const char *a, const char *b, const unsigned int lena, const unsigned int lenb) +{ + int i; + unsigned long *l1; + unsigned long *l2; + unsigned char *c1; + unsigned char *c2; + int num_longs; + + if (likely(lena != lenb)) + return 0; + + l1 = (unsigned long *)a; + l2 = (unsigned long *)b; + + num_longs = lena / sizeof(unsigned long); + + for (i = num_longs; i--; l1++, l2++) { + if (unlikely(*l1 != *l2)) + return 0; + } + + c1 = (unsigned char *) l1; + c2 = (unsigned char *) l2; + + i = lena - (num_longs * sizeof(unsigned long)); + + for (; i--; c1++, c2++) { + if (unlikely(*c1 != *c2)) + return 0; + } + + return 1; +} + +static char * +gen_full_path(struct dentry *dentry, struct vfsmount *vfsmnt, + struct dentry *root, struct vfsmount *rootmnt, char *buf, int buflen) +{ + char *end = buf + buflen; + char *retval; + int namelen = 0; + + *--end = '\0'; + + retval = end - 1; + *retval = '/'; + + if (dentry == root && vfsmnt == rootmnt) + return retval; + if (dentry != vfsmnt->mnt_root && !IS_ROOT(dentry)) { + namelen = strlen(dentry->d_name.name); + buflen -= namelen; + if (buflen < 2) + goto err; + if (dentry->d_parent != root || vfsmnt != rootmnt) + buflen--; + } + + retval = __d_path(dentry->d_parent, vfsmnt, root, rootmnt, buf, buflen); + if (unlikely(IS_ERR(retval))) +err: + retval = strcpy(buf, ""); + else if (namelen != 0) { + end = buf + buflen - 1; // accounts for null termination + if (dentry->d_parent != root || vfsmnt != rootmnt) + *end++ = '/'; // accounted for above with buflen-- + memcpy(end, dentry->d_name.name, namelen); + } + + return retval; +} + +static char * +__d_real_path(const struct dentry *dentry, const struct vfsmount *vfsmnt, + char *buf, int buflen) +{ + char *res; + + /* we can use real_root, real_root_mnt, because this is only called + by the RBAC system */ + res = gen_full_path((struct dentry *)dentry, (struct vfsmount *)vfsmnt, real_root, real_root_mnt, buf, buflen); + + return res; +} + +static char * +d_real_path(const struct dentry *dentry, const struct vfsmount *vfsmnt, + char *buf, int buflen) +{ + char *res; + struct dentry *root; + struct vfsmount *rootmnt; + + /* we can't use real_root, real_root_mnt, because they belong only to the RBAC system */ + read_lock(&child_reaper->fs->lock); + root = dget(child_reaper->fs->root); + rootmnt = mntget(child_reaper->fs->rootmnt); + read_unlock(&child_reaper->fs->lock); + + spin_lock(&dcache_lock); + res = gen_full_path((struct dentry *)dentry, (struct vfsmount *)vfsmnt, root, rootmnt, buf, buflen); + spin_unlock(&dcache_lock); + + dput(root); + mntput(rootmnt); + return res; +} + +static char * +gr_to_filename_rbac(const struct dentry *dentry, const struct vfsmount *mnt) +{ + char *ret; + spin_lock(&dcache_lock); + ret = __d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0],smp_processor_id()), + PAGE_SIZE); + spin_unlock(&dcache_lock); + return ret; +} + +char * +gr_to_filename_nolock(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return __d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0],smp_processor_id()), + PAGE_SIZE); +} + +char * +gr_to_filename(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0], smp_processor_id()), + PAGE_SIZE); +} + +char * +gr_to_filename1(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[1], smp_processor_id()), + PAGE_SIZE); +} + +char * +gr_to_filename2(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[2], smp_processor_id()), + PAGE_SIZE); +} + +char * +gr_to_filename3(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[3], smp_processor_id()), + PAGE_SIZE); +} + +__inline__ __u32 +to_gr_audit(const __u32 reqmode) +{ + /* masks off auditable permission flags, then shifts them to create + auditing flags, and adds the special case of append auditing if + we're requesting write */ + return (((reqmode & GR_AUDIT_READ) << 10) | ((reqmode & GR_WRITE) ? GR_AUDIT_APPEND : 0)); +} + +struct acl_subject_label * +lookup_subject_map(const struct acl_subject_label *userp) +{ + unsigned int index = shash(userp, subj_map_set.s_size); + struct subject_map *match; + + match = subj_map_set.s_hash[index]; + + while (match && match->user != userp) + match = match->next; + + if (match != NULL) + return match->kernel; + else + return NULL; +} + +static void +insert_subj_map_entry(struct subject_map *subjmap) +{ + unsigned int index = shash(subjmap->user, subj_map_set.s_size); + struct subject_map **curr; + + subjmap->prev = NULL; + + curr = &subj_map_set.s_hash[index]; + if (*curr != NULL) + (*curr)->prev = subjmap; + + subjmap->next = *curr; + *curr = subjmap; + + return; +} + +static struct acl_role_label * +lookup_acl_role_label(const struct task_struct *task, const uid_t uid, + const gid_t gid) +{ + unsigned int index = rhash(uid, GR_ROLE_USER, acl_role_set.r_size); + struct acl_role_label *match; + struct role_allowed_ip *ipp; + unsigned int x; + + match = acl_role_set.r_hash[index]; + + while (match) { + if ((match->roletype & (GR_ROLE_DOMAIN | GR_ROLE_USER)) == (GR_ROLE_DOMAIN | GR_ROLE_USER)) { + for (x = 0; x < match->domain_child_num; x++) { + if (match->domain_children[x] == uid) + goto found; + } + } else if (match->uidgid == uid && match->roletype & GR_ROLE_USER) + break; + match = match->next; + } +found: + if (match == NULL) { + try_group: + index = rhash(gid, GR_ROLE_GROUP, acl_role_set.r_size); + match = acl_role_set.r_hash[index]; + + while (match) { + if ((match->roletype & (GR_ROLE_DOMAIN | GR_ROLE_GROUP)) == (GR_ROLE_DOMAIN | GR_ROLE_GROUP)) { + for (x = 0; x < match->domain_child_num; x++) { + if (match->domain_children[x] == gid) + goto found2; + } + } else if (match->uidgid == gid && match->roletype & GR_ROLE_GROUP) + break; + match = match->next; + } +found2: + if (match == NULL) + match = default_role; + if (match->allowed_ips == NULL) + return match; + else { + for (ipp = match->allowed_ips; ipp; ipp = ipp->next) { + if (likely + ((ntohl(task->signal->curr_ip) & ipp->netmask) == + (ntohl(ipp->addr) & ipp->netmask))) + return match; + } + match = default_role; + } + } else if (match->allowed_ips == NULL) { + return match; + } else { + for (ipp = match->allowed_ips; ipp; ipp = ipp->next) { + if (likely + ((ntohl(task->signal->curr_ip) & ipp->netmask) == + (ntohl(ipp->addr) & ipp->netmask))) + return match; + } + goto try_group; + } + + return match; +} + +struct acl_subject_label * +lookup_acl_subj_label(const ino_t ino, const dev_t dev, + const struct acl_role_label *role) +{ + unsigned int index = fhash(ino, dev, role->subj_hash_size); + struct acl_subject_label *match; + + match = role->subj_hash[index]; + + while (match && (match->inode != ino || match->device != dev || + (match->mode & GR_DELETED))) { + match = match->next; + } + + if (match && !(match->mode & GR_DELETED)) + return match; + else + return NULL; +} + +static struct acl_object_label * +lookup_acl_obj_label(const ino_t ino, const dev_t dev, + const struct acl_subject_label *subj) +{ + unsigned int index = fhash(ino, dev, subj->obj_hash_size); + struct acl_object_label *match; + + match = subj->obj_hash[index]; + + while (match && (match->inode != ino || match->device != dev || + (match->mode & GR_DELETED))) { + match = match->next; + } + + if (match && !(match->mode & GR_DELETED)) + return match; + else + return NULL; +} + +static struct acl_object_label * +lookup_acl_obj_label_create(const ino_t ino, const dev_t dev, + const struct acl_subject_label *subj) +{ + unsigned int index = fhash(ino, dev, subj->obj_hash_size); + struct acl_object_label *match; + + match = subj->obj_hash[index]; + + while (match && (match->inode != ino || match->device != dev || + !(match->mode & GR_DELETED))) { + match = match->next; + } + + if (match && (match->mode & GR_DELETED)) + return match; + + match = subj->obj_hash[index]; + + while (match && (match->inode != ino || match->device != dev || + (match->mode & GR_DELETED))) { + match = match->next; + } + + if (match && !(match->mode & GR_DELETED)) + return match; + else + return NULL; +} + +static struct name_entry * +lookup_name_entry(const char *name) +{ + unsigned int len = strlen(name); + unsigned int key = full_name_hash(name, len); + unsigned int index = key % name_set.n_size; + struct name_entry *match; + + match = name_set.n_hash[index]; + + while (match && (match->key != key || !gr_streq(match->name, name, match->len, len))) + match = match->next; + + return match; +} + +static struct inodev_entry * +lookup_inodev_entry(const ino_t ino, const dev_t dev) +{ + unsigned int index = fhash(ino, dev, inodev_set.i_size); + struct inodev_entry *match; + + match = inodev_set.i_hash[index]; + + while (match && (match->nentry->inode != ino || match->nentry->device != dev)) + match = match->next; + + return match; +} + +static void +insert_inodev_entry(struct inodev_entry *entry) +{ + unsigned int index = fhash(entry->nentry->inode, entry->nentry->device, + inodev_set.i_size); + struct inodev_entry **curr; + + entry->prev = NULL; + + curr = &inodev_set.i_hash[index]; + if (*curr != NULL) + (*curr)->prev = entry; + + entry->next = *curr; + *curr = entry; + + return; +} + +static void +__insert_acl_role_label(struct acl_role_label *role, uid_t uidgid) +{ + unsigned int index = + rhash(uidgid, role->roletype & (GR_ROLE_USER | GR_ROLE_GROUP), acl_role_set.r_size); + struct acl_role_label **curr; + + role->prev = NULL; + + curr = &acl_role_set.r_hash[index]; + if (*curr != NULL) + (*curr)->prev = role; + + role->next = *curr; + *curr = role; + + return; +} + +static void +insert_acl_role_label(struct acl_role_label *role) +{ + int i; + + if (role->roletype & GR_ROLE_DOMAIN) { + for (i = 0; i < role->domain_child_num; i++) + __insert_acl_role_label(role, role->domain_children[i]); + } else + __insert_acl_role_label(role, role->uidgid); +} + +static int +insert_name_entry(char *name, const ino_t inode, const dev_t device) +{ + struct name_entry **curr, *nentry; + struct inodev_entry *ientry; + unsigned int len = strlen(name); + unsigned int key = full_name_hash(name, len); + unsigned int index = key % name_set.n_size; + + curr = &name_set.n_hash[index]; + + while (*curr && ((*curr)->key != key || !gr_streq((*curr)->name, name, (*curr)->len, len))) + curr = &((*curr)->next); + + if (*curr != NULL) + return 1; + + nentry = acl_alloc(sizeof (struct name_entry)); + if (nentry == NULL) + return 0; + ientry = acl_alloc(sizeof (struct inodev_entry)); + if (ientry == NULL) + return 0; + ientry->nentry = nentry; + + nentry->key = key; + nentry->name = name; + nentry->inode = inode; + nentry->device = device; + nentry->len = len; + + nentry->prev = NULL; + curr = &name_set.n_hash[index]; + if (*curr != NULL) + (*curr)->prev = nentry; + nentry->next = *curr; + *curr = nentry; + + /* insert us into the table searchable by inode/dev */ + insert_inodev_entry(ientry); + + return 1; +} + +static void +insert_acl_obj_label(struct acl_object_label *obj, + struct acl_subject_label *subj) +{ + unsigned int index = + fhash(obj->inode, obj->device, subj->obj_hash_size); + struct acl_object_label **curr; + + + obj->prev = NULL; + + curr = &subj->obj_hash[index]; + if (*curr != NULL) + (*curr)->prev = obj; + + obj->next = *curr; + *curr = obj; + + return; +} + +static void +insert_acl_subj_label(struct acl_subject_label *obj, + struct acl_role_label *role) +{ + unsigned int index = fhash(obj->inode, obj->device, role->subj_hash_size); + struct acl_subject_label **curr; + + obj->prev = NULL; + + curr = &role->subj_hash[index]; + if (*curr != NULL) + (*curr)->prev = obj; + + obj->next = *curr; + *curr = obj; + + return; +} + +/* allocating chained hash tables, so optimal size is where lambda ~ 1 */ + +static void * +create_table(__u32 * len, int elementsize) +{ + unsigned int table_sizes[] = { + 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, + 32749, 65521, 131071, 262139, 524287, 1048573, 2097143, + 4194301, 8388593, 16777213, 33554393, 67108859, 134217689, + 268435399, 536870909, 1073741789, 2147483647 + }; + void *newtable = NULL; + unsigned int pwr = 0; + + while ((pwr < ((sizeof (table_sizes) / sizeof (table_sizes[0])) - 1)) && + table_sizes[pwr] <= *len) + pwr++; + + if (table_sizes[pwr] <= *len) + return newtable; + + if ((table_sizes[pwr] * elementsize) <= PAGE_SIZE) + newtable = + kmalloc(table_sizes[pwr] * elementsize, GFP_KERNEL); + else + newtable = vmalloc(table_sizes[pwr] * elementsize); + + *len = table_sizes[pwr]; + + return newtable; +} + +static int +init_variables(const struct gr_arg *arg) +{ + unsigned int stacksize; + + subj_map_set.s_size = arg->role_db.num_subjects; + acl_role_set.r_size = arg->role_db.num_roles + arg->role_db.num_domain_children; + name_set.n_size = arg->role_db.num_objects; + inodev_set.i_size = arg->role_db.num_objects; + + if (!subj_map_set.s_size || !acl_role_set.r_size || + !name_set.n_size || !inodev_set.i_size) + return 1; + + if (!gr_init_uidset()) + return 1; + + /* set up the stack that holds allocation info */ + + stacksize = arg->role_db.num_pointers + 5; + + if (!acl_alloc_stack_init(stacksize)) + return 1; + + /* grab reference for the real root dentry and vfsmount */ + read_lock(&child_reaper->fs->lock); + real_root_mnt = mntget(child_reaper->fs->rootmnt); + real_root = dget(child_reaper->fs->root); + read_unlock(&child_reaper->fs->lock); + + fakefs_obj = acl_alloc(sizeof(struct acl_object_label)); + if (fakefs_obj == NULL) + return 1; + fakefs_obj->mode = GR_FIND | GR_READ | GR_WRITE | GR_EXEC; + + subj_map_set.s_hash = + (struct subject_map **) create_table(&subj_map_set.s_size, sizeof(void *)); + acl_role_set.r_hash = + (struct acl_role_label **) create_table(&acl_role_set.r_size, sizeof(void *)); + name_set.n_hash = (struct name_entry **) create_table(&name_set.n_size, sizeof(void *)); + inodev_set.i_hash = + (struct inodev_entry **) create_table(&inodev_set.i_size, sizeof(void *)); + + if (!subj_map_set.s_hash || !acl_role_set.r_hash || + !name_set.n_hash || !inodev_set.i_hash) + return 1; + + memset(subj_map_set.s_hash, 0, + sizeof(struct subject_map *) * subj_map_set.s_size); + memset(acl_role_set.r_hash, 0, + sizeof (struct acl_role_label *) * acl_role_set.r_size); + memset(name_set.n_hash, 0, + sizeof (struct name_entry *) * name_set.n_size); + memset(inodev_set.i_hash, 0, + sizeof (struct inodev_entry *) * inodev_set.i_size); + + return 0; +} + +/* free information not needed after startup + currently contains user->kernel pointer mappings for subjects +*/ + +static void +free_init_variables(void) +{ + __u32 i; + + if (subj_map_set.s_hash) { + for (i = 0; i < subj_map_set.s_size; i++) { + if (subj_map_set.s_hash[i]) { + kfree(subj_map_set.s_hash[i]); + subj_map_set.s_hash[i] = NULL; + } + } + + if ((subj_map_set.s_size * sizeof (struct subject_map *)) <= + PAGE_SIZE) + kfree(subj_map_set.s_hash); + else + vfree(subj_map_set.s_hash); + } + + return; +} + +static void +free_variables(void) +{ + struct acl_subject_label *s; + struct acl_role_label *r; + struct task_struct *task, *task2; + unsigned int i, x; + + gr_clear_learn_entries(); + + read_lock(&tasklist_lock); + do_each_thread(task2, task) { + task->acl_sp_role = 0; + task->acl_role_id = 0; + task->acl = NULL; + task->role = NULL; + } while_each_thread(task2, task); + read_unlock(&tasklist_lock); + + /* release the reference to the real root dentry and vfsmount */ + if (real_root) + dput(real_root); + real_root = NULL; + if (real_root_mnt) + mntput(real_root_mnt); + real_root_mnt = NULL; + + /* free all object hash tables */ + + FOR_EACH_ROLE_START(r, i) + if (r->subj_hash == NULL) + break; + FOR_EACH_SUBJECT_START(r, s, x) + if (s->obj_hash == NULL) + break; + if ((s->obj_hash_size * sizeof (struct acl_object_label *)) <= PAGE_SIZE) + kfree(s->obj_hash); + else + vfree(s->obj_hash); + FOR_EACH_SUBJECT_END(s, x) + FOR_EACH_NESTED_SUBJECT_START(r, s) + if (s->obj_hash == NULL) + break; + if ((s->obj_hash_size * sizeof (struct acl_object_label *)) <= PAGE_SIZE) + kfree(s->obj_hash); + else + vfree(s->obj_hash); + FOR_EACH_NESTED_SUBJECT_END(s) + if ((r->subj_hash_size * sizeof (struct acl_subject_label *)) <= PAGE_SIZE) + kfree(r->subj_hash); + else + vfree(r->subj_hash); + r->subj_hash = NULL; + FOR_EACH_ROLE_END(r,i) + + acl_free_all(); + + if (acl_role_set.r_hash) { + if ((acl_role_set.r_size * sizeof (struct acl_role_label *)) <= + PAGE_SIZE) + kfree(acl_role_set.r_hash); + else + vfree(acl_role_set.r_hash); + } + if (name_set.n_hash) { + if ((name_set.n_size * sizeof (struct name_entry *)) <= + PAGE_SIZE) + kfree(name_set.n_hash); + else + vfree(name_set.n_hash); + } + + if (inodev_set.i_hash) { + if ((inodev_set.i_size * sizeof (struct inodev_entry *)) <= + PAGE_SIZE) + kfree(inodev_set.i_hash); + else + vfree(inodev_set.i_hash); + } + + gr_free_uidset(); + + memset(&name_set, 0, sizeof (struct name_db)); + memset(&inodev_set, 0, sizeof (struct inodev_db)); + memset(&acl_role_set, 0, sizeof (struct acl_role_db)); + memset(&subj_map_set, 0, sizeof (struct acl_subj_map_db)); + + default_role = NULL; + + return; +} + +static __u32 +count_user_objs(struct acl_object_label *userp) +{ + struct acl_object_label o_tmp; + __u32 num = 0; + + while (userp) { + if (copy_from_user(&o_tmp, userp, + sizeof (struct acl_object_label))) + break; + + userp = o_tmp.prev; + num++; + } + + return num; +} + +static struct acl_subject_label * +do_copy_user_subj(struct acl_subject_label *userp, struct acl_role_label *role); + +static int +copy_user_glob(struct acl_object_label *obj) +{ + struct acl_object_label *g_tmp, **guser; + unsigned int len; + char *tmp; + + if (obj->globbed == NULL) + return 0; + + guser = &obj->globbed; + while (*guser) { + g_tmp = (struct acl_object_label *) + acl_alloc(sizeof (struct acl_object_label)); + if (g_tmp == NULL) + return -ENOMEM; + + if (copy_from_user(g_tmp, *guser, + sizeof (struct acl_object_label))) + return -EFAULT; + + len = strnlen_user(g_tmp->filename, PATH_MAX); + + if (!len || len >= PATH_MAX) + return -EINVAL; + + if ((tmp = (char *) acl_alloc(len)) == NULL) + return -ENOMEM; + + if (copy_from_user(tmp, g_tmp->filename, len)) + return -EFAULT; + + g_tmp->filename = tmp; + + *guser = g_tmp; + guser = &(g_tmp->next); + } + + return 0; +} + +static int +copy_user_objs(struct acl_object_label *userp, struct acl_subject_label *subj, + struct acl_role_label *role) +{ + struct acl_object_label *o_tmp; + unsigned int len; + int ret; + char *tmp; + + while (userp) { + if ((o_tmp = (struct acl_object_label *) + acl_alloc(sizeof (struct acl_object_label))) == NULL) + return -ENOMEM; + + if (copy_from_user(o_tmp, userp, + sizeof (struct acl_object_label))) + return -EFAULT; + + userp = o_tmp->prev; + + len = strnlen_user(o_tmp->filename, PATH_MAX); + + if (!len || len >= PATH_MAX) + return -EINVAL; + + if ((tmp = (char *) acl_alloc(len)) == NULL) + return -ENOMEM; + + if (copy_from_user(tmp, o_tmp->filename, len)) + return -EFAULT; + + o_tmp->filename = tmp; + + insert_acl_obj_label(o_tmp, subj); + if (!insert_name_entry(o_tmp->filename, o_tmp->inode, + o_tmp->device)) + return -ENOMEM; + + ret = copy_user_glob(o_tmp); + if (ret) + return ret; + + if (o_tmp->nested) { + o_tmp->nested = do_copy_user_subj(o_tmp->nested, role); + if (IS_ERR(o_tmp->nested)) + return PTR_ERR(o_tmp->nested); + + /* insert into nested subject list */ + o_tmp->nested->next = role->hash->first; + role->hash->first = o_tmp->nested; + } + } + + return 0; +} + +static __u32 +count_user_subjs(struct acl_subject_label *userp) +{ + struct acl_subject_label s_tmp; + __u32 num = 0; + + while (userp) { + if (copy_from_user(&s_tmp, userp, + sizeof (struct acl_subject_label))) + break; + + userp = s_tmp.prev; + /* do not count nested subjects against this count, since + they are not included in the hash table, but are + attached to objects. We have already counted + the subjects in userspace for the allocation + stack + */ + if (!(s_tmp.mode & GR_NESTED)) + num++; + } + + return num; +} + +static int +copy_user_allowedips(struct acl_role_label *rolep) +{ + struct role_allowed_ip *ruserip, *rtmp = NULL, *rlast; + + ruserip = rolep->allowed_ips; + + while (ruserip) { + rlast = rtmp; + + if ((rtmp = (struct role_allowed_ip *) + acl_alloc(sizeof (struct role_allowed_ip))) == NULL) + return -ENOMEM; + + if (copy_from_user(rtmp, ruserip, + sizeof (struct role_allowed_ip))) + return -EFAULT; + + ruserip = rtmp->prev; + + if (!rlast) { + rtmp->prev = NULL; + rolep->allowed_ips = rtmp; + } else { + rlast->next = rtmp; + rtmp->prev = rlast; + } + + if (!ruserip) + rtmp->next = NULL; + } + + return 0; +} + +static int +copy_user_transitions(struct acl_role_label *rolep) +{ + struct role_transition *rusertp, *rtmp = NULL, *rlast; + + unsigned int len; + char *tmp; + + rusertp = rolep->transitions; + + while (rusertp) { + rlast = rtmp; + + if ((rtmp = (struct role_transition *) + acl_alloc(sizeof (struct role_transition))) == NULL) + return -ENOMEM; + + if (copy_from_user(rtmp, rusertp, + sizeof (struct role_transition))) + return -EFAULT; + + rusertp = rtmp->prev; + + len = strnlen_user(rtmp->rolename, GR_SPROLE_LEN); + + if (!len || len >= GR_SPROLE_LEN) + return -EINVAL; + + if ((tmp = (char *) acl_alloc(len)) == NULL) + return -ENOMEM; + + if (copy_from_user(tmp, rtmp->rolename, len)) + return -EFAULT; + + rtmp->rolename = tmp; + + if (!rlast) { + rtmp->prev = NULL; + rolep->transitions = rtmp; + } else { + rlast->next = rtmp; + rtmp->prev = rlast; + } + + if (!rusertp) + rtmp->next = NULL; + } + + return 0; +} + +static struct acl_subject_label * +do_copy_user_subj(struct acl_subject_label *userp, struct acl_role_label *role) +{ + struct acl_subject_label *s_tmp = NULL, *s_tmp2; + unsigned int len; + char *tmp; + __u32 num_objs; + struct acl_ip_label **i_tmp, *i_utmp2; + struct gr_hash_struct ghash; + struct subject_map *subjmap; + unsigned int i_num; + int err; + + s_tmp = lookup_subject_map(userp); + + /* we've already copied this subject into the kernel, just return + the reference to it, and don't copy it over again + */ + if (s_tmp) + return(s_tmp); + + if ((s_tmp = (struct acl_subject_label *) + acl_alloc(sizeof (struct acl_subject_label))) == NULL) + return ERR_PTR(-ENOMEM); + + subjmap = (struct subject_map *)kmalloc(sizeof (struct subject_map), GFP_KERNEL); + if (subjmap == NULL) + return ERR_PTR(-ENOMEM); + + subjmap->user = userp; + subjmap->kernel = s_tmp; + insert_subj_map_entry(subjmap); + + if (copy_from_user(s_tmp, userp, + sizeof (struct acl_subject_label))) + return ERR_PTR(-EFAULT); + + len = strnlen_user(s_tmp->filename, PATH_MAX); + + if (!len || len >= PATH_MAX) + return ERR_PTR(-EINVAL); + + if ((tmp = (char *) acl_alloc(len)) == NULL) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(tmp, s_tmp->filename, len)) + return ERR_PTR(-EFAULT); + + s_tmp->filename = tmp; + + if (!strcmp(s_tmp->filename, "/")) + role->root_label = s_tmp; + + if (copy_from_user(&ghash, s_tmp->hash, sizeof(struct gr_hash_struct))) + return ERR_PTR(-EFAULT); + + /* copy user and group transition tables */ + + if (s_tmp->user_trans_num) { + uid_t *uidlist; + + uidlist = (uid_t *)acl_alloc(s_tmp->user_trans_num * sizeof(uid_t)); + if (uidlist == NULL) + return ERR_PTR(-ENOMEM); + if (copy_from_user(uidlist, s_tmp->user_transitions, s_tmp->user_trans_num * sizeof(uid_t))) + return ERR_PTR(-EFAULT); + + s_tmp->user_transitions = uidlist; + } + + if (s_tmp->group_trans_num) { + gid_t *gidlist; + + gidlist = (gid_t *)acl_alloc(s_tmp->group_trans_num * sizeof(gid_t)); + if (gidlist == NULL) + return ERR_PTR(-ENOMEM); + if (copy_from_user(gidlist, s_tmp->group_transitions, s_tmp->group_trans_num * sizeof(gid_t))) + return ERR_PTR(-EFAULT); + + s_tmp->group_transitions = gidlist; + } + + /* set up object hash table */ + num_objs = count_user_objs(ghash.first); + + s_tmp->obj_hash_size = num_objs; + s_tmp->obj_hash = + (struct acl_object_label **) + create_table(&(s_tmp->obj_hash_size), sizeof(void *)); + + if (!s_tmp->obj_hash) + return ERR_PTR(-ENOMEM); + + memset(s_tmp->obj_hash, 0, + s_tmp->obj_hash_size * + sizeof (struct acl_object_label *)); + + /* add in objects */ + err = copy_user_objs(ghash.first, s_tmp, role); + + if (err) + return ERR_PTR(err); + + /* set pointer for parent subject */ + if (s_tmp->parent_subject) { + s_tmp2 = do_copy_user_subj(s_tmp->parent_subject, role); + + if (IS_ERR(s_tmp2)) + return s_tmp2; + + s_tmp->parent_subject = s_tmp2; + } + + /* add in ip acls */ + + if (!s_tmp->ip_num) { + s_tmp->ips = NULL; + goto insert; + } + + i_tmp = + (struct acl_ip_label **) acl_alloc(s_tmp->ip_num * + sizeof (struct + acl_ip_label *)); + + if (!i_tmp) + return ERR_PTR(-ENOMEM); + + for (i_num = 0; i_num < s_tmp->ip_num; i_num++) { + *(i_tmp + i_num) = + (struct acl_ip_label *) + acl_alloc(sizeof (struct acl_ip_label)); + if (!*(i_tmp + i_num)) + return ERR_PTR(-ENOMEM); + + if (copy_from_user + (&i_utmp2, s_tmp->ips + i_num, + sizeof (struct acl_ip_label *))) + return ERR_PTR(-EFAULT); + + if (copy_from_user + (*(i_tmp + i_num), i_utmp2, + sizeof (struct acl_ip_label))) + return ERR_PTR(-EFAULT); + + if ((*(i_tmp + i_num))->iface == NULL) + continue; + + len = strnlen_user((*(i_tmp + i_num))->iface, IFNAMSIZ); + if (!len || len >= IFNAMSIZ) + return ERR_PTR(-EINVAL); + tmp = acl_alloc(len); + if (tmp == NULL) + return ERR_PTR(-ENOMEM); + if (copy_from_user(tmp, (*(i_tmp + i_num))->iface, len)) + return ERR_PTR(-EFAULT); + (*(i_tmp + i_num))->iface = tmp; + } + + s_tmp->ips = i_tmp; + +insert: + if (!insert_name_entry(s_tmp->filename, s_tmp->inode, + s_tmp->device)) + return ERR_PTR(-ENOMEM); + + return s_tmp; +} + +static int +copy_user_subjs(struct acl_subject_label *userp, struct acl_role_label *role) +{ + struct acl_subject_label s_pre; + struct acl_subject_label * ret; + int err; + + while (userp) { + if (copy_from_user(&s_pre, userp, + sizeof (struct acl_subject_label))) + return -EFAULT; + + /* do not add nested subjects here, add + while parsing objects + */ + + if (s_pre.mode & GR_NESTED) { + userp = s_pre.prev; + continue; + } + + ret = do_copy_user_subj(userp, role); + + err = PTR_ERR(ret); + if (IS_ERR(ret)) + return err; + + insert_acl_subj_label(ret, role); + + userp = s_pre.prev; + } + + return 0; +} + +static int +copy_user_acl(struct gr_arg *arg) +{ + struct acl_role_label *r_tmp = NULL, **r_utmp, *r_utmp2; + struct sprole_pw *sptmp; + struct gr_hash_struct *ghash; + uid_t *domainlist; + unsigned int r_num; + unsigned int len; + char *tmp; + int err = 0; + __u16 i; + __u32 num_subjs; + + /* we need a default and kernel role */ + if (arg->role_db.num_roles < 2) + return -EINVAL; + + /* copy special role authentication info from userspace */ + + num_sprole_pws = arg->num_sprole_pws; + acl_special_roles = (struct sprole_pw **) acl_alloc(num_sprole_pws * sizeof(struct sprole_pw *)); + + if (!acl_special_roles) { + err = -ENOMEM; + goto cleanup; + } + + for (i = 0; i < num_sprole_pws; i++) { + sptmp = (struct sprole_pw *) acl_alloc(sizeof(struct sprole_pw)); + if (!sptmp) { + err = -ENOMEM; + goto cleanup; + } + if (copy_from_user(sptmp, arg->sprole_pws + i, + sizeof (struct sprole_pw))) { + err = -EFAULT; + goto cleanup; + } + + len = + strnlen_user(sptmp->rolename, GR_SPROLE_LEN); + + if (!len || len >= GR_SPROLE_LEN) { + err = -EINVAL; + goto cleanup; + } + + if ((tmp = (char *) acl_alloc(len)) == NULL) { + err = -ENOMEM; + goto cleanup; + } + + if (copy_from_user(tmp, sptmp->rolename, len)) { + err = -EFAULT; + goto cleanup; + } + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "Copying special role %s\n", tmp); +#endif + sptmp->rolename = tmp; + acl_special_roles[i] = sptmp; + } + + r_utmp = (struct acl_role_label **) arg->role_db.r_table; + + for (r_num = 0; r_num < arg->role_db.num_roles; r_num++) { + r_tmp = acl_alloc(sizeof (struct acl_role_label)); + + if (!r_tmp) { + err = -ENOMEM; + goto cleanup; + } + + if (copy_from_user(&r_utmp2, r_utmp + r_num, + sizeof (struct acl_role_label *))) { + err = -EFAULT; + goto cleanup; + } + + if (copy_from_user(r_tmp, r_utmp2, + sizeof (struct acl_role_label))) { + err = -EFAULT; + goto cleanup; + } + + len = strnlen_user(r_tmp->rolename, GR_SPROLE_LEN); + + if (!len || len >= PATH_MAX) { + err = -EINVAL; + goto cleanup; + } + + if ((tmp = (char *) acl_alloc(len)) == NULL) { + err = -ENOMEM; + goto cleanup; + } + if (copy_from_user(tmp, r_tmp->rolename, len)) { + err = -EFAULT; + goto cleanup; + } + r_tmp->rolename = tmp; + + if (!strcmp(r_tmp->rolename, "default") + && (r_tmp->roletype & GR_ROLE_DEFAULT)) { + default_role = r_tmp; + } else if (!strcmp(r_tmp->rolename, ":::kernel:::")) { + kernel_role = r_tmp; + } + + if ((ghash = (struct gr_hash_struct *) acl_alloc(sizeof(struct gr_hash_struct))) == NULL) { + err = -ENOMEM; + goto cleanup; + } + if (copy_from_user(ghash, r_tmp->hash, sizeof(struct gr_hash_struct))) { + err = -EFAULT; + goto cleanup; + } + + r_tmp->hash = ghash; + + num_subjs = count_user_subjs(r_tmp->hash->first); + + r_tmp->subj_hash_size = num_subjs; + r_tmp->subj_hash = + (struct acl_subject_label **) + create_table(&(r_tmp->subj_hash_size), sizeof(void *)); + + if (!r_tmp->subj_hash) { + err = -ENOMEM; + goto cleanup; + } + + err = copy_user_allowedips(r_tmp); + if (err) + goto cleanup; + + /* copy domain info */ + if (r_tmp->domain_children != NULL) { + domainlist = acl_alloc(r_tmp->domain_child_num * sizeof(uid_t)); + if (domainlist == NULL) { + err = -ENOMEM; + goto cleanup; + } + if (copy_from_user(domainlist, r_tmp->domain_children, r_tmp->domain_child_num * sizeof(uid_t))) { + err = -EFAULT; + goto cleanup; + } + r_tmp->domain_children = domainlist; + } + + err = copy_user_transitions(r_tmp); + if (err) + goto cleanup; + + memset(r_tmp->subj_hash, 0, + r_tmp->subj_hash_size * + sizeof (struct acl_subject_label *)); + + err = copy_user_subjs(r_tmp->hash->first, r_tmp); + + if (err) + goto cleanup; + + /* set nested subject list to null */ + r_tmp->hash->first = NULL; + + insert_acl_role_label(r_tmp); + } + + goto return_err; + cleanup: + free_variables(); + return_err: + return err; + +} + +static int +gracl_init(struct gr_arg *args) +{ + int error = 0; + + memcpy(gr_system_salt, args->salt, GR_SALT_LEN); + memcpy(gr_system_sum, args->sum, GR_SHA_LEN); + + if (init_variables(args)) { + gr_log_str(GR_DONT_AUDIT_GOOD, GR_INITF_ACL_MSG, GR_VERSION); + error = -ENOMEM; + free_variables(); + goto out; + } + + error = copy_user_acl(args); + free_init_variables(); + if (error) { + free_variables(); + goto out; + } + + if ((error = gr_set_acls(0))) { + free_variables(); + goto out; + } + + gr_status |= GR_READY; + out: + return error; +} + +/* derived from glibc fnmatch() 0: match, 1: no match*/ + +static int +glob_match(const char *p, const char *n) +{ + char c; + + while ((c = *p++) != '\0') { + switch (c) { + case '?': + if (*n == '\0') + return 1; + else if (*n == '/') + return 1; + break; + case '\\': + if (*n != c) + return 1; + break; + case '*': + for (c = *p++; c == '?' || c == '*'; c = *p++) { + if (*n == '/') + return 1; + else if (c == '?') { + if (*n == '\0') + return 1; + else + ++n; + } + } + if (c == '\0') { + return 0; + } else { + const char *endp; + + if ((endp = strchr(n, '/')) == NULL) + endp = n + strlen(n); + + if (c == '[') { + for (--p; n < endp; ++n) + if (!glob_match(p, n)) + return 0; + } else if (c == '/') { + while (*n != '\0' && *n != '/') + ++n; + if (*n == '/' && !glob_match(p, n + 1)) + return 0; + } else { + for (--p; n < endp; ++n) + if (*n == c && !glob_match(p, n)) + return 0; + } + + return 1; + } + case '[': + { + int not; + char cold; + + if (*n == '\0' || *n == '/') + return 1; + + not = (*p == '!' || *p == '^'); + if (not) + ++p; + + c = *p++; + for (;;) { + unsigned char fn = (unsigned char)*n; + + if (c == '\0') + return 1; + else { + if (c == fn) + goto matched; + cold = c; + c = *p++; + + if (c == '-' && *p != ']') { + unsigned char cend = *p++; + + if (cend == '\0') + return 1; + + if (cold <= fn && fn <= cend) + goto matched; + + c = *p++; + } + } + + if (c == ']') + break; + } + if (!not) + return 1; + break; + matched: + while (c != ']') { + if (c == '\0') + return 1; + + c = *p++; + } + if (not) + return 1; + } + break; + default: + if (c != *n) + return 1; + } + + ++n; + } + + if (*n == '\0') + return 0; + + if (*n == '/') + return 0; + + return 1; +} + +static struct acl_object_label * +chk_glob_label(struct acl_object_label *globbed, + struct dentry *dentry, struct vfsmount *mnt, char **path) +{ + struct acl_object_label *tmp; + + if (*path == NULL) + *path = gr_to_filename_nolock(dentry, mnt); + + tmp = globbed; + + while (tmp) { + if (!glob_match(tmp->filename, *path)) + return tmp; + tmp = tmp->next; + } + + return NULL; +} + +static struct acl_object_label * +__full_lookup(const struct dentry *orig_dentry, const struct vfsmount *orig_mnt, + const ino_t curr_ino, const dev_t curr_dev, + const struct acl_subject_label *subj, char **path) +{ + struct acl_subject_label *tmpsubj; + struct acl_object_label *retval; + struct acl_object_label *retval2; + + tmpsubj = (struct acl_subject_label *) subj; + read_lock(&gr_inode_lock); + do { + retval = lookup_acl_obj_label(curr_ino, curr_dev, tmpsubj); + if (retval) { + if (retval->globbed) { + retval2 = chk_glob_label(retval->globbed, (struct dentry *)orig_dentry, + (struct vfsmount *)orig_mnt, path); + if (retval2) + retval = retval2; + } + break; + } + } while ((tmpsubj = tmpsubj->parent_subject)); + read_unlock(&gr_inode_lock); + + return retval; +} + +static __inline__ struct acl_object_label * +full_lookup(const struct dentry *orig_dentry, const struct vfsmount *orig_mnt, + const struct dentry *curr_dentry, + const struct acl_subject_label *subj, char **path) +{ + return __full_lookup(orig_dentry, orig_mnt, + curr_dentry->d_inode->i_ino, + curr_dentry->d_inode->i_sb->s_dev, subj, path); +} + +static struct acl_object_label * +__chk_obj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, + const struct acl_subject_label *subj, char *path) +{ + struct dentry *dentry = (struct dentry *) l_dentry; + struct vfsmount *mnt = (struct vfsmount *) l_mnt; + struct acl_object_label *retval; + + spin_lock(&dcache_lock); + + if (unlikely(mnt == shm_mnt || mnt == pipe_mnt || mnt == sock_mnt)) { + retval = fakefs_obj; + goto out; + } + + for (;;) { + if (dentry == real_root && mnt == real_root_mnt) + break; + + if (dentry == mnt->mnt_root || IS_ROOT(dentry)) { + if (mnt->mnt_parent == mnt) + break; + + retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); + if (retval != NULL) + goto out; + + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + continue; + } + + retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); + if (retval != NULL) + goto out; + + dentry = dentry->d_parent; + } + + retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); + + if (retval == NULL) + retval = full_lookup(l_dentry, l_mnt, real_root, subj, &path); +out: + spin_unlock(&dcache_lock); + return retval; +} + +static __inline__ struct acl_object_label * +chk_obj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, + const struct acl_subject_label *subj) +{ + char *path = NULL; + return __chk_obj_label(l_dentry, l_mnt, subj, path); +} + +static __inline__ struct acl_object_label * +chk_obj_create_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, + const struct acl_subject_label *subj, char *path) +{ + return __chk_obj_label(l_dentry, l_mnt, subj, path); +} + +static struct acl_subject_label * +chk_subj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, + const struct acl_role_label *role) +{ + struct dentry *dentry = (struct dentry *) l_dentry; + struct vfsmount *mnt = (struct vfsmount *) l_mnt; + struct acl_subject_label *retval; + + spin_lock(&dcache_lock); + + for (;;) { + if (dentry == real_root && mnt == real_root_mnt) + break; + if (dentry == mnt->mnt_root || IS_ROOT(dentry)) { + if (mnt->mnt_parent == mnt) + break; + + read_lock(&gr_inode_lock); + retval = + lookup_acl_subj_label(dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, role); + read_unlock(&gr_inode_lock); + if (retval != NULL) + goto out; + + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + continue; + } + + read_lock(&gr_inode_lock); + retval = lookup_acl_subj_label(dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, role); + read_unlock(&gr_inode_lock); + if (retval != NULL) + goto out; + + dentry = dentry->d_parent; + } + + read_lock(&gr_inode_lock); + retval = lookup_acl_subj_label(dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, role); + read_unlock(&gr_inode_lock); + + if (unlikely(retval == NULL)) { + read_lock(&gr_inode_lock); + retval = lookup_acl_subj_label(real_root->d_inode->i_ino, + real_root->d_inode->i_sb->s_dev, role); + read_unlock(&gr_inode_lock); + } +out: + spin_unlock(&dcache_lock); + + return retval; +} + +static void +gr_log_learn(const struct task_struct *task, const struct dentry *dentry, const struct vfsmount *mnt, const __u32 mode) +{ + security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, task->role->roletype, + task->uid, task->gid, task->exec_file ? gr_to_filename1(task->exec_file->f_dentry, + task->exec_file->f_vfsmnt) : task->acl->filename, task->acl->filename, + 1, 1, gr_to_filename(dentry, mnt), (unsigned long) mode, NIPQUAD(task->signal->curr_ip)); + + return; +} + +static void +gr_log_learn_id_change(const struct task_struct *task, const char type, const unsigned int real, + const unsigned int effective, const unsigned int fs) +{ + security_learn(GR_ID_LEARN_MSG, task->role->rolename, task->role->roletype, + task->uid, task->gid, task->exec_file ? gr_to_filename1(task->exec_file->f_dentry, + task->exec_file->f_vfsmnt) : task->acl->filename, task->acl->filename, + type, real, effective, fs, NIPQUAD(task->signal->curr_ip)); + + return; +} + +__u32 +gr_check_link(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, + const struct dentry * old_dentry, const struct vfsmount * old_mnt) +{ + struct acl_object_label *obj; + __u32 oldmode, newmode; + __u32 needmode; + + if (unlikely(!(gr_status & GR_READY))) + return (GR_CREATE | GR_LINK); + + obj = chk_obj_label(old_dentry, old_mnt, current->acl); + oldmode = obj->mode; + + if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) + oldmode |= (GR_CREATE | GR_LINK); + + needmode = GR_CREATE | GR_AUDIT_CREATE | GR_SUPPRESS; + if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID)) + needmode |= GR_SETID | GR_AUDIT_SETID; + + newmode = + gr_check_create(new_dentry, parent_dentry, parent_mnt, + oldmode | needmode); + + needmode = newmode & (GR_FIND | GR_APPEND | GR_WRITE | GR_EXEC | + GR_SETID | GR_READ | GR_FIND | GR_DELETE | + GR_INHERIT | GR_AUDIT_INHERIT); + + if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID) && !(newmode & GR_SETID)) + goto bad; + + if ((oldmode & needmode) != needmode) + goto bad; + + needmode = oldmode & (GR_NOPTRACE | GR_PTRACERD | GR_INHERIT | GR_AUDITS); + if ((newmode & needmode) != needmode) + goto bad; + + if ((newmode & (GR_CREATE | GR_LINK)) == (GR_CREATE | GR_LINK)) + return newmode; +bad: + needmode = oldmode; + if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID)) + needmode |= GR_SETID; + + if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) { + gr_log_learn(current, old_dentry, old_mnt, needmode); + return (GR_CREATE | GR_LINK); + } else if (newmode & GR_SUPPRESS) + return GR_SUPPRESS; + else + return 0; +} + +__u32 +gr_search_file(const struct dentry * dentry, const __u32 mode, + const struct vfsmount * mnt) +{ + __u32 retval = mode; + struct acl_subject_label *curracl; + struct acl_object_label *currobj; + + if (unlikely(!(gr_status & GR_READY))) + return (mode & ~GR_AUDITS); + + curracl = current->acl; + + currobj = chk_obj_label(dentry, mnt, curracl); + retval = currobj->mode & mode; + + if (unlikely + ((curracl->mode & (GR_LEARN | GR_INHERITLEARN)) && !(mode & GR_NOPTRACE) + && (retval != (mode & ~(GR_AUDITS | GR_SUPPRESS))))) { + __u32 new_mode = mode; + + new_mode &= ~(GR_AUDITS | GR_SUPPRESS); + + retval = new_mode; + + if (new_mode & GR_EXEC && curracl->mode & GR_INHERITLEARN) + new_mode |= GR_INHERIT; + + if (!(mode & GR_NOLEARN)) + gr_log_learn(current, dentry, mnt, new_mode); + } + + return retval; +} + +__u32 +gr_check_create(const struct dentry * new_dentry, const struct dentry * parent, + const struct vfsmount * mnt, const __u32 mode) +{ + struct name_entry *match; + struct acl_object_label *matchpo; + struct acl_subject_label *curracl; + char *path; + __u32 retval; + + if (unlikely(!(gr_status & GR_READY))) + return (mode & ~GR_AUDITS); + + preempt_disable(); + path = gr_to_filename_rbac(new_dentry, mnt); + match = lookup_name_entry(path); + + if (!match) + goto check_parent; + + curracl = current->acl; + + read_lock(&gr_inode_lock); + matchpo = lookup_acl_obj_label_create(match->inode, match->device, curracl); + read_unlock(&gr_inode_lock); + + if (matchpo) { + if ((matchpo->mode & mode) != + (mode & ~(GR_AUDITS | GR_SUPPRESS)) + && curracl->mode & (GR_LEARN | GR_INHERITLEARN)) { + __u32 new_mode = mode; + + new_mode &= ~(GR_AUDITS | GR_SUPPRESS); + + gr_log_learn(current, new_dentry, mnt, new_mode); + + preempt_enable(); + return new_mode; + } + preempt_enable(); + return (matchpo->mode & mode); + } + + check_parent: + curracl = current->acl; + + matchpo = chk_obj_create_label(parent, mnt, curracl, path); + retval = matchpo->mode & mode; + + if ((retval != (mode & ~(GR_AUDITS | GR_SUPPRESS))) + && (curracl->mode & (GR_LEARN | GR_INHERITLEARN))) { + __u32 new_mode = mode; + + new_mode &= ~(GR_AUDITS | GR_SUPPRESS); + + gr_log_learn(current, new_dentry, mnt, new_mode); + preempt_enable(); + return new_mode; + } + + preempt_enable(); + return retval; +} + +int +gr_check_hidden_task(const struct task_struct *task) +{ + if (unlikely(!(gr_status & GR_READY))) + return 0; + + if (!(task->acl->mode & GR_PROCFIND) && !(current->acl->mode & GR_VIEW)) + return 1; + + return 0; +} + +int +gr_check_protected_task(const struct task_struct *task) +{ + if (unlikely(!(gr_status & GR_READY) || !task)) + return 0; + + if ((task->acl->mode & GR_PROTECTED) && !(current->acl->mode & GR_KILL) && + task->acl != current->acl) + return 1; + + return 0; +} + +void +gr_copy_label(struct task_struct *tsk) +{ + tsk->signal->used_accept = 0; + tsk->acl_sp_role = 0; + tsk->acl_role_id = current->acl_role_id; + tsk->acl = current->acl; + tsk->role = current->role; + tsk->signal->curr_ip = current->signal->curr_ip; + if (current->exec_file) + get_file(current->exec_file); + tsk->exec_file = current->exec_file; + tsk->is_writable = current->is_writable; + if (unlikely(current->signal->used_accept)) + current->signal->curr_ip = 0; + + return; +} + +static void +gr_set_proc_res(struct task_struct *task) +{ + struct acl_subject_label *proc; + unsigned short i; + + proc = task->acl; + + if (proc->mode & (GR_LEARN | GR_INHERITLEARN)) + return; + + for (i = 0; i < (GR_NLIMITS - 1); i++) { + if (!(proc->resmask & (1 << i))) + continue; + + task->signal->rlim[i].rlim_cur = proc->res[i].rlim_cur; + task->signal->rlim[i].rlim_max = proc->res[i].rlim_max; + } + + return; +} + +int +gr_check_user_change(int real, int effective, int fs) +{ + unsigned int i; + __u16 num; + uid_t *uidlist; + int curuid; + int realok = 0; + int effectiveok = 0; + int fsok = 0; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) + gr_log_learn_id_change(current, 'u', real, effective, fs); + + num = current->acl->user_trans_num; + uidlist = current->acl->user_transitions; + + if (uidlist == NULL) + return 0; + + if (real == -1) + realok = 1; + if (effective == -1) + effectiveok = 1; + if (fs == -1) + fsok = 1; + + if (current->acl->user_trans_type & GR_ID_ALLOW) { + for (i = 0; i < num; i++) { + curuid = (int)uidlist[i]; + if (real == curuid) + realok = 1; + if (effective == curuid) + effectiveok = 1; + if (fs == curuid) + fsok = 1; + } + } else if (current->acl->user_trans_type & GR_ID_DENY) { + for (i = 0; i < num; i++) { + curuid = (int)uidlist[i]; + if (real == curuid) + break; + if (effective == curuid) + break; + if (fs == curuid) + break; + } + /* not in deny list */ + if (i == num) { + realok = 1; + effectiveok = 1; + fsok = 1; + } + } + + if (realok && effectiveok && fsok) + return 0; + else { + gr_log_int(GR_DONT_AUDIT, GR_USRCHANGE_ACL_MSG, realok ? (effectiveok ? (fsok ? 0 : fs) : effective) : real); + return 1; + } +} + +int +gr_check_group_change(int real, int effective, int fs) +{ + unsigned int i; + __u16 num; + gid_t *gidlist; + int curgid; + int realok = 0; + int effectiveok = 0; + int fsok = 0; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) + gr_log_learn_id_change(current, 'g', real, effective, fs); + + num = current->acl->group_trans_num; + gidlist = current->acl->group_transitions; + + if (gidlist == NULL) + return 0; + + if (real == -1) + realok = 1; + if (effective == -1) + effectiveok = 1; + if (fs == -1) + fsok = 1; + + if (current->acl->group_trans_type & GR_ID_ALLOW) { + for (i = 0; i < num; i++) { + curgid = (int)gidlist[i]; + if (real == curgid) + realok = 1; + if (effective == curgid) + effectiveok = 1; + if (fs == curgid) + fsok = 1; + } + } else if (current->acl->group_trans_type & GR_ID_DENY) { + for (i = 0; i < num; i++) { + curgid = (int)gidlist[i]; + if (real == curgid) + break; + if (effective == curgid) + break; + if (fs == curgid) + break; + } + /* not in deny list */ + if (i == num) { + realok = 1; + effectiveok = 1; + fsok = 1; + } + } + + if (realok && effectiveok && fsok) + return 0; + else { + gr_log_int(GR_DONT_AUDIT, GR_GRPCHANGE_ACL_MSG, realok ? (effectiveok ? (fsok ? 0 : fs) : effective) : real); + return 1; + } +} + +void +gr_set_role_label(struct task_struct *task, const uid_t uid, const uid_t gid) +{ + struct acl_role_label *role = task->role; + struct acl_subject_label *subj = NULL; + struct acl_object_label *obj; + struct file *filp; + + if (unlikely(!(gr_status & GR_READY))) + return; + + filp = task->exec_file; + + /* kernel process, we'll give them the kernel role */ + if (unlikely(!filp)) { + task->role = kernel_role; + task->acl = kernel_role->root_label; + return; + } else if (!task->role || !(task->role->roletype & GR_ROLE_SPECIAL)) + role = lookup_acl_role_label(task, uid, gid); + + /* perform subject lookup in possibly new role + we can use this result below in the case where role == task->role + */ + subj = chk_subj_label(filp->f_dentry, filp->f_vfsmnt, role); + + /* if we changed uid/gid, but result in the same role + and are using inheritance, don't lose the inherited subject + if current subject is other than what normal lookup + would result in, we arrived via inheritance, don't + lose subject + */ + if (role != task->role || (!(task->acl->mode & GR_INHERITLEARN) && + (subj == task->acl))) + task->acl = subj; + + task->role = role; + + task->is_writable = 0; + + /* ignore additional mmap checks for processes that are writable + by the default ACL */ + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, default_role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, task->role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "Set role label for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); +#endif + + gr_set_proc_res(task); + + return; +} + +int +gr_set_proc_label(const struct dentry *dentry, const struct vfsmount *mnt) +{ + struct task_struct *task = current; + struct acl_subject_label *newacl; + struct acl_object_label *obj; + __u32 retmode; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + newacl = chk_subj_label(dentry, mnt, task->role); + + task_lock(task); + if (((task->ptrace & PT_PTRACED) && !(task->acl->mode & + GR_POVERRIDE) && (task->acl != newacl) && + !(task->role->roletype & GR_ROLE_GOD) && + !gr_search_file(dentry, GR_PTRACERD, mnt) && + !(task->acl->mode & (GR_LEARN | GR_INHERITLEARN))) || + (atomic_read(&task->fs->count) > 1 || + atomic_read(&task->files->count) > 1 || + atomic_read(&task->sighand->count) > 1)) { + task_unlock(task); + gr_log_fs_generic(GR_DONT_AUDIT, GR_PTRACE_EXEC_ACL_MSG, dentry, mnt); + return -EACCES; + } + task_unlock(task); + + obj = chk_obj_label(dentry, mnt, task->acl); + retmode = obj->mode & (GR_INHERIT | GR_AUDIT_INHERIT); + + if (!(task->acl->mode & GR_INHERITLEARN) && + ((newacl->mode & GR_LEARN) || !(retmode & GR_INHERIT))) { + if (obj->nested) + task->acl = obj->nested; + else + task->acl = newacl; + } else if (retmode & GR_INHERIT && retmode & GR_AUDIT_INHERIT) + gr_log_str_fs(GR_DO_AUDIT, GR_INHERIT_ACL_MSG, task->acl->filename, dentry, mnt); + + task->is_writable = 0; + + /* ignore additional mmap checks for processes that are writable + by the default ACL */ + obj = chk_obj_label(dentry, mnt, default_role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + obj = chk_obj_label(dentry, mnt, task->role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + + gr_set_proc_res(task); + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "Set subject label for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); +#endif + return 0; +} + +static void +do_handle_delete(const ino_t ino, const dev_t dev) +{ + struct acl_object_label *matchpo; + struct acl_subject_label *matchps; + struct acl_subject_label *subj; + struct acl_role_label *role; + unsigned int i, x; + + FOR_EACH_ROLE_START(role, i) + FOR_EACH_SUBJECT_START(role, subj, x) + if ((matchpo = lookup_acl_obj_label(ino, dev, subj)) != NULL) + matchpo->mode |= GR_DELETED; + FOR_EACH_SUBJECT_END(subj,x) + FOR_EACH_NESTED_SUBJECT_START(role, subj) + if (subj->inode == ino && subj->device == dev) + subj->mode |= GR_DELETED; + FOR_EACH_NESTED_SUBJECT_END(subj) + if ((matchps = lookup_acl_subj_label(ino, dev, role)) != NULL) + matchps->mode |= GR_DELETED; + FOR_EACH_ROLE_END(role,i) + + return; +} + +void +gr_handle_delete(const ino_t ino, const dev_t dev) +{ + if (unlikely(!(gr_status & GR_READY))) + return; + + write_lock(&gr_inode_lock); + if (unlikely((unsigned long)lookup_inodev_entry(ino, dev))) + do_handle_delete(ino, dev); + write_unlock(&gr_inode_lock); + + return; +} + +static void +update_acl_obj_label(const ino_t oldinode, const dev_t olddevice, + const ino_t newinode, const dev_t newdevice, + struct acl_subject_label *subj) +{ + unsigned int index = fhash(oldinode, olddevice, subj->obj_hash_size); + struct acl_object_label *match; + + match = subj->obj_hash[index]; + + while (match && (match->inode != oldinode || + match->device != olddevice || + !(match->mode & GR_DELETED))) + match = match->next; + + if (match && (match->inode == oldinode) + && (match->device == olddevice) + && (match->mode & GR_DELETED)) { + if (match->prev == NULL) { + subj->obj_hash[index] = match->next; + if (match->next != NULL) + match->next->prev = NULL; + } else { + match->prev->next = match->next; + if (match->next != NULL) + match->next->prev = match->prev; + } + match->prev = NULL; + match->next = NULL; + match->inode = newinode; + match->device = newdevice; + match->mode &= ~GR_DELETED; + + insert_acl_obj_label(match, subj); + } + + return; +} + +static void +update_acl_subj_label(const ino_t oldinode, const dev_t olddevice, + const ino_t newinode, const dev_t newdevice, + struct acl_role_label *role) +{ + unsigned int index = fhash(oldinode, olddevice, role->subj_hash_size); + struct acl_subject_label *match; + + match = role->subj_hash[index]; + + while (match && (match->inode != oldinode || + match->device != olddevice || + !(match->mode & GR_DELETED))) + match = match->next; + + if (match && (match->inode == oldinode) + && (match->device == olddevice) + && (match->mode & GR_DELETED)) { + if (match->prev == NULL) { + role->subj_hash[index] = match->next; + if (match->next != NULL) + match->next->prev = NULL; + } else { + match->prev->next = match->next; + if (match->next != NULL) + match->next->prev = match->prev; + } + match->prev = NULL; + match->next = NULL; + match->inode = newinode; + match->device = newdevice; + match->mode &= ~GR_DELETED; + + insert_acl_subj_label(match, role); + } + + return; +} + +static void +update_inodev_entry(const ino_t oldinode, const dev_t olddevice, + const ino_t newinode, const dev_t newdevice) +{ + unsigned int index = fhash(oldinode, olddevice, inodev_set.i_size); + struct inodev_entry *match; + + match = inodev_set.i_hash[index]; + + while (match && (match->nentry->inode != oldinode || + match->nentry->device != olddevice)) + match = match->next; + + if (match && (match->nentry->inode == oldinode) + && (match->nentry->device == olddevice)) { + if (match->prev == NULL) { + inodev_set.i_hash[index] = match->next; + if (match->next != NULL) + match->next->prev = NULL; + } else { + match->prev->next = match->next; + if (match->next != NULL) + match->next->prev = match->prev; + } + match->prev = NULL; + match->next = NULL; + match->nentry->inode = newinode; + match->nentry->device = newdevice; + + insert_inodev_entry(match); + } + + return; +} + +static void +do_handle_create(const struct name_entry *matchn, const struct dentry *dentry, + const struct vfsmount *mnt) +{ + struct acl_subject_label *subj; + struct acl_role_label *role; + unsigned int i, x; + + FOR_EACH_ROLE_START(role, i) + update_acl_subj_label(matchn->inode, matchn->device, + dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, role); + + FOR_EACH_NESTED_SUBJECT_START(role, subj) + if ((subj->inode == dentry->d_inode->i_ino) && + (subj->device == dentry->d_inode->i_sb->s_dev)) { + subj->inode = dentry->d_inode->i_ino; + subj->device = dentry->d_inode->i_sb->s_dev; + } + FOR_EACH_NESTED_SUBJECT_END(subj) + FOR_EACH_SUBJECT_START(role, subj, x) + update_acl_obj_label(matchn->inode, matchn->device, + dentry->d_inode->i_ino, + dentry->d_inode->i_sb->s_dev, subj); + FOR_EACH_SUBJECT_END(subj,x) + FOR_EACH_ROLE_END(role,i) + + update_inodev_entry(matchn->inode, matchn->device, + dentry->d_inode->i_ino, dentry->d_inode->i_sb->s_dev); + + return; +} + +void +gr_handle_create(const struct dentry *dentry, const struct vfsmount *mnt) +{ + struct name_entry *matchn; + + if (unlikely(!(gr_status & GR_READY))) + return; + + preempt_disable(); + matchn = lookup_name_entry(gr_to_filename_rbac(dentry, mnt)); + + if (unlikely((unsigned long)matchn)) { + write_lock(&gr_inode_lock); + do_handle_create(matchn, dentry, mnt); + write_unlock(&gr_inode_lock); + } + preempt_enable(); + + return; +} + +void +gr_handle_rename(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, + struct dentry *new_dentry, + struct vfsmount *mnt, const __u8 replace) +{ + struct name_entry *matchn; + + if (unlikely(!(gr_status & GR_READY))) + return; + + preempt_disable(); + matchn = lookup_name_entry(gr_to_filename_rbac(new_dentry, mnt)); + + /* we wouldn't have to check d_inode if it weren't for + NFS silly-renaming + */ + + write_lock(&gr_inode_lock); + if (unlikely(replace && new_dentry->d_inode)) { + if (unlikely(lookup_inodev_entry(new_dentry->d_inode->i_ino, + new_dentry->d_inode->i_sb->s_dev) && + (old_dentry->d_inode->i_nlink <= 1))) + do_handle_delete(new_dentry->d_inode->i_ino, + new_dentry->d_inode->i_sb->s_dev); + } + + if (unlikely(lookup_inodev_entry(old_dentry->d_inode->i_ino, + old_dentry->d_inode->i_sb->s_dev) && + (old_dentry->d_inode->i_nlink <= 1))) + do_handle_delete(old_dentry->d_inode->i_ino, + old_dentry->d_inode->i_sb->s_dev); + + if (unlikely((unsigned long)matchn)) + do_handle_create(matchn, old_dentry, mnt); + + write_unlock(&gr_inode_lock); + preempt_enable(); + + return; +} + +static int +lookup_special_role_auth(__u16 mode, const char *rolename, unsigned char **salt, + unsigned char **sum) +{ + struct acl_role_label *r; + struct role_allowed_ip *ipp; + struct role_transition *trans; + unsigned int i; + int found = 0; + + /* check transition table */ + + for (trans = current->role->transitions; trans; trans = trans->next) { + if (!strcmp(rolename, trans->rolename)) { + found = 1; + break; + } + } + + if (!found) + return 0; + + /* handle special roles that do not require authentication + and check ip */ + + FOR_EACH_ROLE_START(r, i) + if (!strcmp(rolename, r->rolename) && + (r->roletype & GR_ROLE_SPECIAL)) { + found = 0; + if (r->allowed_ips != NULL) { + for (ipp = r->allowed_ips; ipp; ipp = ipp->next) { + if ((ntohl(current->signal->curr_ip) & ipp->netmask) == + (ntohl(ipp->addr) & ipp->netmask)) + found = 1; + } + } else + found = 2; + if (!found) + return 0; + + if (((mode == SPROLE) && (r->roletype & GR_ROLE_NOPW)) || + ((mode == SPROLEPAM) && (r->roletype & GR_ROLE_PAM))) { + *salt = NULL; + *sum = NULL; + return 1; + } + } + FOR_EACH_ROLE_END(r,i) + + for (i = 0; i < num_sprole_pws; i++) { + if (!strcmp(rolename, acl_special_roles[i]->rolename)) { + *salt = acl_special_roles[i]->salt; + *sum = acl_special_roles[i]->sum; + return 1; + } + } + + return 0; +} + +static void +assign_special_role(char *rolename) +{ + struct acl_object_label *obj; + struct acl_role_label *r; + struct acl_role_label *assigned = NULL; + struct task_struct *tsk; + struct file *filp; + unsigned int i; + + FOR_EACH_ROLE_START(r, i) + if (!strcmp(rolename, r->rolename) && + (r->roletype & GR_ROLE_SPECIAL)) + assigned = r; + FOR_EACH_ROLE_END(r,i) + + if (!assigned) + return; + + read_lock(&tasklist_lock); + read_lock(&grsec_exec_file_lock); + + tsk = current->parent; + if (tsk == NULL) + goto out_unlock; + + filp = tsk->exec_file; + if (filp == NULL) + goto out_unlock; + + tsk->is_writable = 0; + + tsk->acl_sp_role = 1; + tsk->acl_role_id = ++acl_sp_role_value; + tsk->role = assigned; + tsk->acl = chk_subj_label(filp->f_dentry, filp->f_vfsmnt, tsk->role); + + /* ignore additional mmap checks for processes that are writable + by the default ACL */ + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, default_role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + tsk->is_writable = 1; + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, tsk->role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + tsk->is_writable = 1; + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "Assigning special role:%s subject:%s to process (%s:%d)\n", tsk->role->rolename, tsk->acl->filename, tsk->comm, tsk->pid); +#endif + +out_unlock: + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + return; +} + +int gr_check_secure_terminal(struct task_struct *task) +{ + struct task_struct *p, *p2, *p3; + struct files_struct *files; + struct fdtable *fdt; + struct file *our_file = NULL, *file; + int i; + + if (task->signal->tty == NULL) + return 1; + + files = get_files_struct(task); + if (files != NULL) { + rcu_read_lock(); + fdt = files_fdtable(files); + for (i=0; i < fdt->max_fds; i++) { + file = fcheck_files(files, i); + if (file && (our_file == NULL) && (file->private_data == task->signal->tty)) { + get_file(file); + our_file = file; + } + } + rcu_read_unlock(); + put_files_struct(files); + } + + if (our_file == NULL) + return 1; + + read_lock(&tasklist_lock); + do_each_thread(p2, p) { + files = get_files_struct(p); + if (files == NULL || + (p->signal && p->signal->tty == task->signal->tty)) { + if (files != NULL) + put_files_struct(files); + continue; + } + rcu_read_lock(); + fdt = files_fdtable(files); + for (i=0; i < fdt->max_fds; i++) { + file = fcheck_files(files, i); + if (file && S_ISCHR(file->f_dentry->d_inode->i_mode) && + file->f_dentry->d_inode->i_rdev == our_file->f_dentry->d_inode->i_rdev) { + p3 = task; + while (p3->pid > 0) { + if (p3 == p) + break; + p3 = p3->parent; + } + if (p3 == p) + break; + gr_log_ttysniff(GR_DONT_AUDIT_GOOD, GR_TTYSNIFF_ACL_MSG, p); + gr_handle_alertkill(p); + rcu_read_unlock(); + put_files_struct(files); + read_unlock(&tasklist_lock); + fput(our_file); + return 0; + } + } + rcu_read_unlock(); + put_files_struct(files); + } while_each_thread(p2, p); + read_unlock(&tasklist_lock); + + fput(our_file); + return 1; +} + +ssize_t +write_grsec_handler(struct file *file, const char * buf, size_t count, loff_t *ppos) +{ + struct gr_arg_wrapper uwrap; + unsigned char *sprole_salt; + unsigned char *sprole_sum; + int error = sizeof (struct gr_arg_wrapper); + int error2 = 0; + + down(&gr_dev_sem); + + if ((gr_status & GR_READY) && !(current->acl->mode & GR_KERNELAUTH)) { + error = -EPERM; + goto out; + } + + if (count != sizeof (struct gr_arg_wrapper)) { + gr_log_int_int(GR_DONT_AUDIT_GOOD, GR_DEV_ACL_MSG, (int)count, (int)sizeof(struct gr_arg_wrapper)); + error = -EINVAL; + goto out; + } + + + if (gr_auth_expires && time_after_eq(get_seconds(), gr_auth_expires)) { + gr_auth_expires = 0; + gr_auth_attempts = 0; + } + + if (copy_from_user(&uwrap, buf, sizeof (struct gr_arg_wrapper))) { + error = -EFAULT; + goto out; + } + + if ((uwrap.version != GRSECURITY_VERSION) || (uwrap.size != sizeof(struct gr_arg))) { + error = -EINVAL; + goto out; + } + + if (copy_from_user(gr_usermode, uwrap.arg, sizeof (struct gr_arg))) { + error = -EFAULT; + goto out; + } + + if (gr_usermode->mode != SPROLE && gr_usermode->mode != SPROLEPAM && + gr_auth_attempts >= CONFIG_GRKERNSEC_ACL_MAXTRIES && + time_after(gr_auth_expires, get_seconds())) { + error = -EBUSY; + goto out; + } + + /* if non-root trying to do anything other than use a special role, + do not attempt authentication, do not count towards authentication + locking + */ + + if (gr_usermode->mode != SPROLE && gr_usermode->mode != STATUS && + gr_usermode->mode != UNSPROLE && gr_usermode->mode != SPROLEPAM && + current->uid) { + error = -EPERM; + goto out; + } + + /* ensure pw and special role name are null terminated */ + + gr_usermode->pw[GR_PW_LEN - 1] = '\0'; + gr_usermode->sp_role[GR_SPROLE_LEN - 1] = '\0'; + + /* Okay. + * We have our enough of the argument structure..(we have yet + * to copy_from_user the tables themselves) . Copy the tables + * only if we need them, i.e. for loading operations. */ + + switch (gr_usermode->mode) { + case STATUS: + if (gr_status & GR_READY) { + error = 1; + if (!gr_check_secure_terminal(current)) + error = 3; + } else + error = 2; + goto out; + case SHUTDOWN: + if ((gr_status & GR_READY) + && !(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { + gr_status &= ~GR_READY; + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SHUTS_ACL_MSG); + free_variables(); + memset(gr_usermode, 0, sizeof (struct gr_arg)); + memset(gr_system_salt, 0, GR_SALT_LEN); + memset(gr_system_sum, 0, GR_SHA_LEN); + } else if (gr_status & GR_READY) { + gr_log_noargs(GR_DONT_AUDIT, GR_SHUTF_ACL_MSG); + error = -EPERM; + } else { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SHUTI_ACL_MSG); + error = -EAGAIN; + } + break; + case ENABLE: + if (!(gr_status & GR_READY) && !(error2 = gracl_init(gr_usermode))) + gr_log_str(GR_DONT_AUDIT_GOOD, GR_ENABLE_ACL_MSG, GR_VERSION); + else { + if (gr_status & GR_READY) + error = -EAGAIN; + else + error = error2; + gr_log_str(GR_DONT_AUDIT, GR_ENABLEF_ACL_MSG, GR_VERSION); + } + break; + case RELOAD: + if (!(gr_status & GR_READY)) { + gr_log_str(GR_DONT_AUDIT_GOOD, GR_RELOADI_ACL_MSG, GR_VERSION); + error = -EAGAIN; + } else if (!(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { + lock_kernel(); + gr_status &= ~GR_READY; + free_variables(); + if (!(error2 = gracl_init(gr_usermode))) { + unlock_kernel(); + gr_log_str(GR_DONT_AUDIT_GOOD, GR_RELOAD_ACL_MSG, GR_VERSION); + } else { + unlock_kernel(); + error = error2; + gr_log_str(GR_DONT_AUDIT, GR_RELOADF_ACL_MSG, GR_VERSION); + } + } else { + gr_log_str(GR_DONT_AUDIT, GR_RELOADF_ACL_MSG, GR_VERSION); + error = -EPERM; + } + break; + case SEGVMOD: + if (unlikely(!(gr_status & GR_READY))) { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SEGVMODI_ACL_MSG); + error = -EAGAIN; + break; + } + + if (!(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SEGVMODS_ACL_MSG); + if (gr_usermode->segv_device && gr_usermode->segv_inode) { + struct acl_subject_label *segvacl; + segvacl = + lookup_acl_subj_label(gr_usermode->segv_inode, + gr_usermode->segv_device, + current->role); + if (segvacl) { + segvacl->crashes = 0; + segvacl->expires = 0; + } + } else if (gr_find_uid(gr_usermode->segv_uid) >= 0) { + gr_remove_uid(gr_usermode->segv_uid); + } + } else { + gr_log_noargs(GR_DONT_AUDIT, GR_SEGVMODF_ACL_MSG); + error = -EPERM; + } + break; + case SPROLE: + case SPROLEPAM: + if (unlikely(!(gr_status & GR_READY))) { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SPROLEI_ACL_MSG); + error = -EAGAIN; + break; + } + + if (current->role->expires && time_after_eq(get_seconds(), current->role->expires)) { + current->role->expires = 0; + current->role->auth_attempts = 0; + } + + if (current->role->auth_attempts >= CONFIG_GRKERNSEC_ACL_MAXTRIES && + time_after(current->role->expires, get_seconds())) { + error = -EBUSY; + goto out; + } + + if (lookup_special_role_auth + (gr_usermode->mode, gr_usermode->sp_role, &sprole_salt, &sprole_sum) + && ((!sprole_salt && !sprole_sum) + || !(chkpw(gr_usermode, sprole_salt, sprole_sum)))) { + char *p = ""; + assign_special_role(gr_usermode->sp_role); + read_lock(&tasklist_lock); + if (current->parent) + p = current->parent->role->rolename; + read_unlock(&tasklist_lock); + gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_SPROLES_ACL_MSG, + p, acl_sp_role_value); + } else { + gr_log_str(GR_DONT_AUDIT, GR_SPROLEF_ACL_MSG, gr_usermode->sp_role); + error = -EPERM; + if(!(current->role->auth_attempts++)) + current->role->expires = get_seconds() + CONFIG_GRKERNSEC_ACL_TIMEOUT; + + goto out; + } + break; + case UNSPROLE: + if (unlikely(!(gr_status & GR_READY))) { + gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_UNSPROLEI_ACL_MSG); + error = -EAGAIN; + break; + } + + if (current->role->roletype & GR_ROLE_SPECIAL) { + char *p = ""; + int i = 0; + + read_lock(&tasklist_lock); + if (current->parent) { + p = current->parent->role->rolename; + i = current->parent->acl_role_id; + } + read_unlock(&tasklist_lock); + + gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_UNSPROLES_ACL_MSG, p, i); + gr_set_acls(1); + } else { + gr_log_str(GR_DONT_AUDIT, GR_UNSPROLEF_ACL_MSG, current->role->rolename); + error = -EPERM; + goto out; + } + break; + default: + gr_log_int(GR_DONT_AUDIT, GR_INVMODE_ACL_MSG, gr_usermode->mode); + error = -EINVAL; + break; + } + + if (error != -EPERM) + goto out; + + if(!(gr_auth_attempts++)) + gr_auth_expires = get_seconds() + CONFIG_GRKERNSEC_ACL_TIMEOUT; + + out: + up(&gr_dev_sem); + return error; +} + +int +gr_set_acls(const int type) +{ + struct acl_object_label *obj; + struct task_struct *task, *task2; + struct file *filp; + struct acl_role_label *role = current->role; + __u16 acl_role_id = current->acl_role_id; + + read_lock(&tasklist_lock); + read_lock(&grsec_exec_file_lock); + do_each_thread(task2, task) { + /* check to see if we're called from the exit handler, + if so, only replace ACLs that have inherited the admin + ACL */ + + if (type && (task->role != role || + task->acl_role_id != acl_role_id)) + continue; + + task->acl_role_id = 0; + task->acl_sp_role = 0; + + if ((filp = task->exec_file)) { + task->role = lookup_acl_role_label(task, task->uid, task->gid); + + task->acl = + chk_subj_label(filp->f_dentry, filp->f_vfsmnt, + task->role); + if (task->acl) { + struct acl_subject_label *curr; + curr = task->acl; + + task->is_writable = 0; + /* ignore additional mmap checks for processes that are writable + by the default ACL */ + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, default_role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, task->role->root_label); + if (unlikely(obj->mode & GR_WRITE)) + task->is_writable = 1; + + gr_set_proc_res(task); + +#ifdef CONFIG_GRKERNSEC_ACL_DEBUG + printk(KERN_ALERT "gr_set_acls for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); +#endif + } else { + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_DEFACL_MSG, task->comm, task->pid); + return 1; + } + } else { + // it's a kernel process + task->role = kernel_role; + task->acl = kernel_role->root_label; +#ifdef CONFIG_GRKERNSEC_ACL_HIDEKERN + task->acl->mode &= ~GR_PROCFIND; +#endif + } + } while_each_thread(task2, task); + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + return 0; +} + +void +gr_learn_resource(const struct task_struct *task, + const int res, const unsigned long wanted, const int gt) +{ + struct acl_subject_label *acl; + + if (unlikely((gr_status & GR_READY) && + task->acl && (task->acl->mode & (GR_LEARN | GR_INHERITLEARN)))) + goto skip_reslog; + +#ifdef CONFIG_GRKERNSEC_RESLOG + gr_log_resource(task, res, wanted, gt); +#endif + skip_reslog: + + if (unlikely(!(gr_status & GR_READY) || !wanted)) + return; + + acl = task->acl; + + if (likely(!acl || !(acl->mode & (GR_LEARN | GR_INHERITLEARN)) || + !(acl->resmask & (1 << (unsigned short) res)))) + return; + + if (wanted >= acl->res[res].rlim_cur) { + unsigned long res_add; + + res_add = wanted; + switch (res) { + case RLIMIT_CPU: + res_add += GR_RLIM_CPU_BUMP; + break; + case RLIMIT_FSIZE: + res_add += GR_RLIM_FSIZE_BUMP; + break; + case RLIMIT_DATA: + res_add += GR_RLIM_DATA_BUMP; + break; + case RLIMIT_STACK: + res_add += GR_RLIM_STACK_BUMP; + break; + case RLIMIT_CORE: + res_add += GR_RLIM_CORE_BUMP; + break; + case RLIMIT_RSS: + res_add += GR_RLIM_RSS_BUMP; + break; + case RLIMIT_NPROC: + res_add += GR_RLIM_NPROC_BUMP; + break; + case RLIMIT_NOFILE: + res_add += GR_RLIM_NOFILE_BUMP; + break; + case RLIMIT_MEMLOCK: + res_add += GR_RLIM_MEMLOCK_BUMP; + break; + case RLIMIT_AS: + res_add += GR_RLIM_AS_BUMP; + break; + case RLIMIT_LOCKS: + res_add += GR_RLIM_LOCKS_BUMP; + break; + } + + acl->res[res].rlim_cur = res_add; + + if (wanted > acl->res[res].rlim_max) + acl->res[res].rlim_max = res_add; + + security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, + task->role->roletype, acl->filename, + acl->res[res].rlim_cur, acl->res[res].rlim_max, + "", (unsigned long) res); + } + + return; +} + +#ifdef CONFIG_PAX_HAVE_ACL_FLAGS +void +pax_set_initial_flags(struct linux_binprm *bprm) +{ + struct task_struct *task = current; + struct acl_subject_label *proc; + unsigned long flags; + + if (unlikely(!(gr_status & GR_READY))) + return; + + flags = pax_get_flags(task); + + proc = task->acl; + + if (proc->pax_flags & GR_PAX_DISABLE_PAGEEXEC) + flags &= ~MF_PAX_PAGEEXEC; + if (proc->pax_flags & GR_PAX_DISABLE_SEGMEXEC) + flags &= ~MF_PAX_SEGMEXEC; + if (proc->pax_flags & GR_PAX_DISABLE_RANDMMAP) + flags &= ~MF_PAX_RANDMMAP; + if (proc->pax_flags & GR_PAX_DISABLE_EMUTRAMP) + flags &= ~MF_PAX_EMUTRAMP; + if (proc->pax_flags & GR_PAX_DISABLE_MPROTECT) + flags &= ~MF_PAX_MPROTECT; + + if (proc->pax_flags & GR_PAX_ENABLE_PAGEEXEC) + flags |= MF_PAX_PAGEEXEC; + if (proc->pax_flags & GR_PAX_ENABLE_SEGMEXEC) + flags |= MF_PAX_SEGMEXEC; + if (proc->pax_flags & GR_PAX_ENABLE_RANDMMAP) + flags |= MF_PAX_RANDMMAP; + if (proc->pax_flags & GR_PAX_ENABLE_EMUTRAMP) + flags |= MF_PAX_EMUTRAMP; + if (proc->pax_flags & GR_PAX_ENABLE_MPROTECT) + flags |= MF_PAX_MPROTECT; + + pax_set_flags(task, flags); + + return; +} +#endif + +#ifdef CONFIG_SYSCTL +extern struct proc_dir_entry *proc_sys_root; + +/* the following function is called under the BKL */ + +__u32 +gr_handle_sysctl(const struct ctl_table *table, const void *oldval, + const void *newval) +{ + struct proc_dir_entry *tmp; + struct nameidata nd; + const char *proc_sys = "/proc/sys"; + char *path; + struct acl_object_label *obj; + unsigned short len = 0, pos = 0, depth = 0, i; + __u32 err = 0; + __u32 mode = 0; + + if (unlikely(!(gr_status & GR_READY))) + return 1; + + path = per_cpu_ptr(gr_shared_page[0], smp_processor_id()); + + if (oldval) + mode |= GR_READ; + if (newval) + mode |= GR_WRITE; + + /* convert the requested sysctl entry into a pathname */ + + for (tmp = table->de; tmp != proc_sys_root; tmp = tmp->parent) { + len += strlen(tmp->name); + len++; + depth++; + } + + if ((len + depth + strlen(proc_sys) + 1) > PAGE_SIZE) + return 0; /* deny */ + + memset(path, 0, PAGE_SIZE); + + memcpy(path, proc_sys, strlen(proc_sys)); + + pos += strlen(proc_sys); + + for (; depth > 0; depth--) { + path[pos] = '/'; + pos++; + for (i = 1, tmp = table->de; tmp != proc_sys_root; + tmp = tmp->parent) { + if (depth == i) { + memcpy(path + pos, tmp->name, + strlen(tmp->name)); + pos += strlen(tmp->name); + } + i++; + } + } + + err = path_lookup(path, LOOKUP_FOLLOW, &nd); + + if (err) + goto out; + + obj = chk_obj_label(nd.dentry, nd.mnt, current->acl); + err = obj->mode & (mode | to_gr_audit(mode) | GR_SUPPRESS); + + if (unlikely((current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) && + ((err & mode) != mode))) { + __u32 new_mode = mode; + + new_mode &= ~(GR_AUDITS | GR_SUPPRESS); + + err = new_mode; + gr_log_learn(current, nd.dentry, nd.mnt, new_mode); + } else if ((err & mode) != mode && !(err & GR_SUPPRESS)) { + gr_log_str4(GR_DONT_AUDIT, GR_SYSCTL_ACL_MSG, "denied", + path, (mode & GR_READ) ? " reading" : "", + (mode & GR_WRITE) ? " writing" : ""); + err = 0; + } else if ((err & mode) != mode) { + err = 0; + } else if (((err & mode) == mode) && (err & GR_AUDITS)) { + gr_log_str4(GR_DO_AUDIT, GR_SYSCTL_ACL_MSG, "successful", + path, (mode & GR_READ) ? " reading" : "", + (mode & GR_WRITE) ? " writing" : ""); + } + + path_release(&nd); + + out: + return err; +} +#endif + +int +gr_handle_proc_ptrace(struct task_struct *task) +{ + struct file *filp; + struct task_struct *tmp = task; + struct task_struct *curtemp = current; + __u32 retmode; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + read_lock(&tasklist_lock); + read_lock(&grsec_exec_file_lock); + filp = task->exec_file; + + while (tmp->pid > 0) { + if (tmp == curtemp) + break; + tmp = tmp->parent; + } + + if (!filp || (tmp->pid == 0 && !(current->acl->mode & GR_RELAXPTRACE))) { + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + return 1; + } + + retmode = gr_search_file(filp->f_dentry, GR_NOPTRACE, filp->f_vfsmnt); + read_unlock(&grsec_exec_file_lock); + read_unlock(&tasklist_lock); + + if (retmode & GR_NOPTRACE) + return 1; + + if (!(current->acl->mode & GR_POVERRIDE) && !(current->role->roletype & GR_ROLE_GOD) + && (current->acl != task->acl || (current->acl != current->role->root_label + && current->pid != task->pid))) + return 1; + + return 0; +} + +int +gr_handle_ptrace(struct task_struct *task, const long request) +{ + struct task_struct *tmp = task; + struct task_struct *curtemp = current; + __u32 retmode; + + if (unlikely(!(gr_status & GR_READY))) + return 0; + + read_lock(&tasklist_lock); + while (tmp->pid > 0) { + if (tmp == curtemp) + break; + tmp = tmp->parent; + } + + if (tmp->pid == 0 && !(current->acl->mode & GR_RELAXPTRACE)) { + read_unlock(&tasklist_lock); + gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); + return 1; + } + read_unlock(&tasklist_lock); + + read_lock(&grsec_exec_file_lock); + if (unlikely(!task->exec_file)) { + read_unlock(&grsec_exec_file_lock); + return 0; + } + + retmode = gr_search_file(task->exec_file->f_dentry, GR_PTRACERD | GR_NOPTRACE, task->exec_file->f_vfsmnt); + read_unlock(&grsec_exec_file_lock); + + if (retmode & GR_NOPTRACE) { + gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); + return 1; + } + + if (retmode & GR_PTRACERD) { + switch (request) { + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + case PTRACE_POKEUSR: +#if !defined(CONFIG_PPC32) && !defined(CONFIG_PPC64) && !defined(CONFIG_PARISC) && !defined(CONFIG_ALPHA) && !defined(CONFIG_IA64) + case PTRACE_SETREGS: + case PTRACE_SETFPREGS: +#endif +#ifdef CONFIG_X86 + case PTRACE_SETFPXREGS: +#endif +#ifdef CONFIG_ALTIVEC + case PTRACE_SETVRREGS: +#endif + return 1; + default: + return 0; + } + } else if (!(current->acl->mode & GR_POVERRIDE) && + !(current->role->roletype & GR_ROLE_GOD) && + (current->acl != task->acl)) { + gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); + return 1; + } + + return 0; +} + +static int is_writable_mmap(const struct file *filp) +{ + struct task_struct *task = current; + struct acl_object_label *obj, *obj2; + + if (gr_status & GR_READY && !(task->acl->mode & GR_OVERRIDE) && + !task->is_writable && S_ISREG(filp->f_dentry->d_inode->i_mode)) { + obj = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, default_role->root_label); + obj2 = chk_obj_label(filp->f_dentry, filp->f_vfsmnt, + task->role->root_label); + if (unlikely((obj->mode & GR_WRITE) || (obj2->mode & GR_WRITE))) { + gr_log_fs_generic(GR_DONT_AUDIT, GR_WRITLIB_ACL_MSG, filp->f_dentry, filp->f_vfsmnt); + return 1; + } + } + return 0; +} + +int +gr_acl_handle_mmap(const struct file *file, const unsigned long prot) +{ + __u32 mode; + + if (unlikely(!file || !(prot & PROT_EXEC))) + return 1; + + if (is_writable_mmap(file)) + return 0; + + mode = + gr_search_file(file->f_dentry, + GR_EXEC | GR_AUDIT_EXEC | GR_SUPPRESS, + file->f_vfsmnt); + + if (!gr_tpe_allow(file)) + return 0; + + if (unlikely(!(mode & GR_EXEC) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_MMAP_ACL_MSG, file->f_dentry, file->f_vfsmnt); + return 0; + } else if (unlikely(!(mode & GR_EXEC))) { + return 0; + } else if (unlikely(mode & GR_EXEC && mode & GR_AUDIT_EXEC)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_MMAP_ACL_MSG, file->f_dentry, file->f_vfsmnt); + return 1; + } + + return 1; +} + +int +gr_acl_handle_mprotect(const struct file *file, const unsigned long prot) +{ + __u32 mode; + + if (unlikely(!file || !(prot & PROT_EXEC))) + return 1; + + if (is_writable_mmap(file)) + return 0; + + mode = + gr_search_file(file->f_dentry, + GR_EXEC | GR_AUDIT_EXEC | GR_SUPPRESS, + file->f_vfsmnt); + + if (!gr_tpe_allow(file)) + return 0; + + if (unlikely(!(mode & GR_EXEC) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_MPROTECT_ACL_MSG, file->f_dentry, file->f_vfsmnt); + return 0; + } else if (unlikely(!(mode & GR_EXEC))) { + return 0; + } else if (unlikely(mode & GR_EXEC && mode & GR_AUDIT_EXEC)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_MPROTECT_ACL_MSG, file->f_dentry, file->f_vfsmnt); + return 1; + } + + return 1; +} + +void +gr_acl_handle_psacct(struct task_struct *task, const long code) +{ + unsigned long runtime; + unsigned long cputime; + unsigned int wday, cday; + __u8 whr, chr; + __u8 wmin, cmin; + __u8 wsec, csec; + + if (unlikely(!(gr_status & GR_READY) || !task->acl || + !(task->acl->mode & GR_PROCACCT))) + return; + + runtime = xtime.tv_sec - task->start_time.tv_sec; + wday = runtime / (3600 * 24); + runtime -= wday * (3600 * 24); + whr = runtime / 3600; + runtime -= whr * 3600; + wmin = runtime / 60; + runtime -= wmin * 60; + wsec = runtime; + + cputime = (task->utime + task->stime) / HZ; + cday = cputime / (3600 * 24); + cputime -= cday * (3600 * 24); + chr = cputime / 3600; + cputime -= chr * 3600; + cmin = cputime / 60; + cputime -= cmin * 60; + csec = cputime; + + gr_log_procacct(GR_DO_AUDIT, GR_ACL_PROCACCT_MSG, task, wday, whr, wmin, wsec, cday, chr, cmin, csec, code); + + return; +} + +void gr_set_kernel_label(struct task_struct *task) +{ + if (gr_status & GR_READY) { + task->role = kernel_role; + task->acl = kernel_role->root_label; + } + return; +} + +int gr_acl_handle_filldir(const struct file *file, const char *name, const unsigned int namelen, const ino_t ino) +{ + struct task_struct *task = current; + struct dentry *dentry = file->f_dentry; + struct vfsmount *mnt = file->f_vfsmnt; + struct acl_object_label *obj, *tmp; + struct acl_subject_label *subj; + unsigned int bufsize; + int is_not_root; + char *path; + + if (unlikely(!(gr_status & GR_READY))) + return 1; + + if (task->acl->mode & (GR_LEARN | GR_INHERITLEARN)) + return 1; + + subj = task->acl; + do { + obj = lookup_acl_obj_label(ino, dentry->d_inode->i_sb->s_dev, subj); + if (obj != NULL) + return (obj->mode & GR_FIND) ? 1 : 0; + } while ((subj = subj->parent_subject)); + + obj = chk_obj_label(dentry, mnt, task->acl); + if (obj->globbed == NULL) + return (obj->mode & GR_FIND) ? 1 : 0; + + is_not_root = ((obj->filename[0] == '/') && + (obj->filename[1] == '\0')) ? 0 : 1; + bufsize = PAGE_SIZE - namelen - is_not_root; + + /* check bufsize > PAGE_SIZE || bufsize == 0 */ + if (unlikely((bufsize - 1) > (PAGE_SIZE - 1))) + return 1; + + preempt_disable(); + path = d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0], smp_processor_id()), + bufsize); + + bufsize = strlen(path); + + /* if base is "/", don't append an additional slash */ + if (is_not_root) + *(path + bufsize) = '/'; + memcpy(path + bufsize + is_not_root, name, namelen); + *(path + bufsize + namelen + is_not_root) = '\0'; + + tmp = obj->globbed; + while (tmp) { + if (!glob_match(tmp->filename, path)) { + preempt_enable(); + return (tmp->mode & GR_FIND) ? 1 : 0; + } + tmp = tmp->next; + } + preempt_enable(); + return (obj->mode & GR_FIND) ? 1 : 0; +} + +EXPORT_SYMBOL(gr_learn_resource); +EXPORT_SYMBOL(gr_set_kernel_label); +#ifdef CONFIG_SECURITY +EXPORT_SYMBOL(gr_check_user_change); +EXPORT_SYMBOL(gr_check_group_change); +#endif + diff -Naur linux-2.6.19.2.orig/grsecurity/gracl_alloc.c linux-2.6.19.2/grsecurity/gracl_alloc.c --- linux-2.6.19.2.orig/grsecurity/gracl_alloc.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/gracl_alloc.c 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include + +static unsigned long alloc_stack_next = 1; +static unsigned long alloc_stack_size = 1; +static void **alloc_stack; + +static __inline__ int +alloc_pop(void) +{ + if (alloc_stack_next == 1) + return 0; + + kfree(alloc_stack[alloc_stack_next - 2]); + + alloc_stack_next--; + + return 1; +} + +static __inline__ void +alloc_push(void *buf) +{ + if (alloc_stack_next >= alloc_stack_size) + BUG(); + + alloc_stack[alloc_stack_next - 1] = buf; + + alloc_stack_next++; + + return; +} + +void * +acl_alloc(unsigned long len) +{ + void *ret; + + if (len > PAGE_SIZE) + BUG(); + + ret = kmalloc(len, GFP_KERNEL); + + if (ret) + alloc_push(ret); + + return ret; +} + +void +acl_free_all(void) +{ + if (gr_acl_is_enabled() || !alloc_stack) + return; + + while (alloc_pop()) ; + + if (alloc_stack) { + if ((alloc_stack_size * sizeof (void *)) <= PAGE_SIZE) + kfree(alloc_stack); + else + vfree(alloc_stack); + } + + alloc_stack = NULL; + alloc_stack_size = 1; + alloc_stack_next = 1; + + return; +} + +int +acl_alloc_stack_init(unsigned long size) +{ + if ((size * sizeof (void *)) <= PAGE_SIZE) + alloc_stack = + (void **) kmalloc(size * sizeof (void *), GFP_KERNEL); + else + alloc_stack = (void **) vmalloc(size * sizeof (void *)); + + alloc_stack_size = size; + + if (!alloc_stack) + return 0; + else + return 1; +} diff -Naur linux-2.6.19.2.orig/grsecurity/gracl_cap.c linux-2.6.19.2/grsecurity/gracl_cap.c --- linux-2.6.19.2.orig/grsecurity/gracl_cap.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/gracl_cap.c 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include +#include +#include + +static const char *captab_log[] = { + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_DAC_READ_SEARCH", + "CAP_FOWNER", + "CAP_FSETID", + "CAP_KILL", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETPCAP", + "CAP_LINUX_IMMUTABLE", + "CAP_NET_BIND_SERVICE", + "CAP_NET_BROADCAST", + "CAP_NET_ADMIN", + "CAP_NET_RAW", + "CAP_IPC_LOCK", + "CAP_IPC_OWNER", + "CAP_SYS_MODULE", + "CAP_SYS_RAWIO", + "CAP_SYS_CHROOT", + "CAP_SYS_PTRACE", + "CAP_SYS_PACCT", + "CAP_SYS_ADMIN", + "CAP_SYS_BOOT", + "CAP_SYS_NICE", + "CAP_SYS_RESOURCE", + "CAP_SYS_TIME", + "CAP_SYS_TTY_CONFIG", + "CAP_MKNOD", + "CAP_LEASE" +}; + +EXPORT_SYMBOL(gr_task_is_capable); + +int +gr_task_is_capable(struct task_struct *task, const int cap) +{ + struct acl_subject_label *curracl; + __u32 cap_drop = 0, cap_mask = 0; + + if (!gr_acl_is_enabled()) + return 1; + + curracl = task->acl; + + cap_drop = curracl->cap_lower; + cap_mask = curracl->cap_mask; + + while ((curracl = curracl->parent_subject)) { + if (!(cap_mask & (1 << cap)) && (curracl->cap_mask & (1 << cap))) + cap_drop |= curracl->cap_lower & (1 << cap); + cap_mask |= curracl->cap_mask; + } + + if (!cap_raised(cap_drop, cap)) + return 1; + + curracl = task->acl; + + if ((curracl->mode & (GR_LEARN | GR_INHERITLEARN)) + && cap_raised(task->cap_effective, cap)) { + security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, + task->role->roletype, task->uid, + task->gid, task->exec_file ? + gr_to_filename(task->exec_file->f_dentry, + task->exec_file->f_vfsmnt) : curracl->filename, + curracl->filename, 0UL, + 0UL, "", (unsigned long) cap, NIPQUAD(task->signal->curr_ip)); + return 1; + } + + if ((cap >= 0) && (cap < (sizeof(captab_log)/sizeof(captab_log[0]))) && cap_raised(task->cap_effective, cap)) + gr_log_cap(GR_DONT_AUDIT, GR_CAP_ACL_MSG, task, captab_log[cap]); + + return 0; +} + +int +gr_is_capable_nolog(const int cap) +{ + struct acl_subject_label *curracl; + __u32 cap_drop = 0, cap_mask = 0; + + if (!gr_acl_is_enabled()) + return 1; + + curracl = current->acl; + + cap_drop = curracl->cap_lower; + cap_mask = curracl->cap_mask; + + while ((curracl = curracl->parent_subject)) { + cap_drop |= curracl->cap_lower & (cap_mask & ~curracl->cap_mask); + cap_mask |= curracl->cap_mask; + } + + if (!cap_raised(cap_drop, cap)) + return 1; + + return 0; +} + diff -Naur linux-2.6.19.2.orig/grsecurity/gracl_fs.c linux-2.6.19.2/grsecurity/gracl_fs.c --- linux-2.6.19.2.orig/grsecurity/gracl_fs.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/gracl_fs.c 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,423 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__u32 +gr_acl_handle_hidden_file(const struct dentry * dentry, + const struct vfsmount * mnt) +{ + __u32 mode; + + if (unlikely(!dentry->d_inode)) + return GR_FIND; + + mode = + gr_search_file(dentry, GR_FIND | GR_AUDIT_FIND | GR_SUPPRESS, mnt); + + if (unlikely(mode & GR_FIND && mode & GR_AUDIT_FIND)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_HIDDEN_ACL_MSG, dentry, mnt); + return mode; + } else if (unlikely(!(mode & GR_FIND) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_HIDDEN_ACL_MSG, dentry, mnt); + return 0; + } else if (unlikely(!(mode & GR_FIND))) + return 0; + + return GR_FIND; +} + +__u32 +gr_acl_handle_open(const struct dentry * dentry, const struct vfsmount * mnt, + const int fmode) +{ + __u32 reqmode = GR_FIND; + __u32 mode; + + if (unlikely(!dentry->d_inode)) + return reqmode; + + if (unlikely(fmode & O_APPEND)) + reqmode |= GR_APPEND; + else if (unlikely(fmode & FMODE_WRITE)) + reqmode |= GR_WRITE; + if (likely((fmode & FMODE_READ) && !(fmode & O_DIRECTORY))) + reqmode |= GR_READ; + + mode = + gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, + mnt); + + if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { + gr_log_fs_rbac_mode2(GR_DO_AUDIT, GR_OPEN_ACL_MSG, dentry, mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : reqmode & + GR_APPEND ? " appending" : ""); + return reqmode; + } else + if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) + { + gr_log_fs_rbac_mode2(GR_DONT_AUDIT, GR_OPEN_ACL_MSG, dentry, mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : reqmode & + GR_APPEND ? " appending" : ""); + return 0; + } else if (unlikely((mode & reqmode) != reqmode)) + return 0; + + return reqmode; +} + +__u32 +gr_acl_handle_creat(const struct dentry * dentry, + const struct dentry * p_dentry, + const struct vfsmount * p_mnt, const int fmode, + const int imode) +{ + __u32 reqmode = GR_WRITE | GR_CREATE; + __u32 mode; + + if (unlikely(fmode & O_APPEND)) + reqmode |= GR_APPEND; + if (unlikely((fmode & FMODE_READ) && !(fmode & O_DIRECTORY))) + reqmode |= GR_READ; + if (unlikely((fmode & O_CREAT) && (imode & (S_ISUID | S_ISGID)))) + reqmode |= GR_SETID; + + mode = + gr_check_create(dentry, p_dentry, p_mnt, + reqmode | to_gr_audit(reqmode) | GR_SUPPRESS); + + if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { + gr_log_fs_rbac_mode2(GR_DO_AUDIT, GR_CREATE_ACL_MSG, dentry, p_mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : reqmode & + GR_APPEND ? " appending" : ""); + return reqmode; + } else + if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) + { + gr_log_fs_rbac_mode2(GR_DONT_AUDIT, GR_CREATE_ACL_MSG, dentry, p_mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : reqmode & + GR_APPEND ? " appending" : ""); + return 0; + } else if (unlikely((mode & reqmode) != reqmode)) + return 0; + + return reqmode; +} + +__u32 +gr_acl_handle_access(const struct dentry * dentry, const struct vfsmount * mnt, + const int fmode) +{ + __u32 mode, reqmode = GR_FIND; + + if ((fmode & S_IXOTH) && !S_ISDIR(dentry->d_inode->i_mode)) + reqmode |= GR_EXEC; + if (fmode & S_IWOTH) + reqmode |= GR_WRITE; + if (fmode & S_IROTH) + reqmode |= GR_READ; + + mode = + gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, + mnt); + + if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { + gr_log_fs_rbac_mode3(GR_DO_AUDIT, GR_ACCESS_ACL_MSG, dentry, mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : "", + reqmode & GR_EXEC ? " executing" : ""); + return reqmode; + } else + if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) + { + gr_log_fs_rbac_mode3(GR_DONT_AUDIT, GR_ACCESS_ACL_MSG, dentry, mnt, + reqmode & GR_READ ? " reading" : "", + reqmode & GR_WRITE ? " writing" : "", + reqmode & GR_EXEC ? " executing" : ""); + return 0; + } else if (unlikely((mode & reqmode) != reqmode)) + return 0; + + return reqmode; +} + +static __u32 generic_fs_handler(const struct dentry *dentry, const struct vfsmount *mnt, __u32 reqmode, const char *fmt) +{ + __u32 mode; + + mode = gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, mnt); + + if (unlikely(((mode & (reqmode)) == (reqmode)) && mode & GR_AUDITS)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, fmt, dentry, mnt); + return mode; + } else if (unlikely((mode & (reqmode)) != (reqmode) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, fmt, dentry, mnt); + return 0; + } else if (unlikely((mode & (reqmode)) != (reqmode))) + return 0; + + return (reqmode); +} + +__u32 +gr_acl_handle_rmdir(const struct dentry * dentry, const struct vfsmount * mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE | GR_DELETE , GR_RMDIR_ACL_MSG); +} + +__u32 +gr_acl_handle_unlink(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE | GR_DELETE , GR_UNLINK_ACL_MSG); +} + +__u32 +gr_acl_handle_truncate(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_TRUNCATE_ACL_MSG); +} + +__u32 +gr_acl_handle_utime(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_ATIME_ACL_MSG); +} + +__u32 +gr_acl_handle_fchmod(const struct dentry *dentry, const struct vfsmount *mnt, + mode_t mode) +{ + if (unlikely(dentry->d_inode && S_ISSOCK(dentry->d_inode->i_mode))) + return 1; + + if (unlikely((mode != (mode_t)-1) && (mode & (S_ISUID | S_ISGID)))) { + return generic_fs_handler(dentry, mnt, GR_WRITE | GR_SETID, + GR_FCHMOD_ACL_MSG); + } else { + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_FCHMOD_ACL_MSG); + } +} + +__u32 +gr_acl_handle_chmod(const struct dentry *dentry, const struct vfsmount *mnt, + mode_t mode) +{ + if (unlikely((mode != (mode_t)-1) && (mode & (S_ISUID | S_ISGID)))) { + return generic_fs_handler(dentry, mnt, GR_WRITE | GR_SETID, + GR_CHMOD_ACL_MSG); + } else { + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_CHMOD_ACL_MSG); + } +} + +__u32 +gr_acl_handle_chown(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_WRITE, GR_CHOWN_ACL_MSG); +} + +__u32 +gr_acl_handle_execve(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_EXEC, GR_EXEC_ACL_MSG); +} + +__u32 +gr_acl_handle_unix(const struct dentry *dentry, const struct vfsmount *mnt) +{ + return generic_fs_handler(dentry, mnt, GR_READ | GR_WRITE, + GR_UNIXCONNECT_ACL_MSG); +} + +/* hardlinks require at minimum create permission, + any additional privilege required is based on the + privilege of the file being linked to +*/ +__u32 +gr_acl_handle_link(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, + const struct dentry * old_dentry, + const struct vfsmount * old_mnt, const char *to) +{ + __u32 mode; + __u32 needmode = GR_CREATE | GR_LINK; + __u32 needaudit = GR_AUDIT_CREATE | GR_AUDIT_LINK; + + mode = + gr_check_link(new_dentry, parent_dentry, parent_mnt, old_dentry, + old_mnt); + + if (unlikely(((mode & needmode) == needmode) && (mode & needaudit))) { + gr_log_fs_rbac_str(GR_DO_AUDIT, GR_LINK_ACL_MSG, old_dentry, old_mnt, to); + return mode; + } else if (unlikely(((mode & needmode) != needmode) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_str(GR_DONT_AUDIT, GR_LINK_ACL_MSG, old_dentry, old_mnt, to); + return 0; + } else if (unlikely((mode & needmode) != needmode)) + return 0; + + return 1; +} + +__u32 +gr_acl_handle_symlink(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, const char *from) +{ + __u32 needmode = GR_WRITE | GR_CREATE; + __u32 mode; + + mode = + gr_check_create(new_dentry, parent_dentry, parent_mnt, + GR_CREATE | GR_AUDIT_CREATE | + GR_WRITE | GR_AUDIT_WRITE | GR_SUPPRESS); + + if (unlikely(mode & GR_WRITE && mode & GR_AUDITS)) { + gr_log_fs_str_rbac(GR_DO_AUDIT, GR_SYMLINK_ACL_MSG, from, new_dentry, parent_mnt); + return mode; + } else if (unlikely(((mode & needmode) != needmode) && !(mode & GR_SUPPRESS))) { + gr_log_fs_str_rbac(GR_DONT_AUDIT, GR_SYMLINK_ACL_MSG, from, new_dentry, parent_mnt); + return 0; + } else if (unlikely((mode & needmode) != needmode)) + return 0; + + return (GR_WRITE | GR_CREATE); +} + +static __u32 generic_fs_create_handler(const struct dentry *new_dentry, const struct dentry *parent_dentry, const struct vfsmount *parent_mnt, __u32 reqmode, const char *fmt) +{ + __u32 mode; + + mode = gr_check_create(new_dentry, parent_dentry, parent_mnt, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS); + + if (unlikely(((mode & (reqmode)) == (reqmode)) && mode & GR_AUDITS)) { + gr_log_fs_rbac_generic(GR_DO_AUDIT, fmt, new_dentry, parent_mnt); + return mode; + } else if (unlikely((mode & (reqmode)) != (reqmode) && !(mode & GR_SUPPRESS))) { + gr_log_fs_rbac_generic(GR_DONT_AUDIT, fmt, new_dentry, parent_mnt); + return 0; + } else if (unlikely((mode & (reqmode)) != (reqmode))) + return 0; + + return (reqmode); +} + +__u32 +gr_acl_handle_mknod(const struct dentry * new_dentry, + const struct dentry * parent_dentry, + const struct vfsmount * parent_mnt, + const int mode) +{ + __u32 reqmode = GR_WRITE | GR_CREATE; + if (unlikely(mode & (S_ISUID | S_ISGID))) + reqmode |= GR_SETID; + + return generic_fs_create_handler(new_dentry, parent_dentry, parent_mnt, + reqmode, GR_MKNOD_ACL_MSG); +} + +__u32 +gr_acl_handle_mkdir(const struct dentry *new_dentry, + const struct dentry *parent_dentry, + const struct vfsmount *parent_mnt) +{ + return generic_fs_create_handler(new_dentry, parent_dentry, parent_mnt, + GR_WRITE | GR_CREATE, GR_MKDIR_ACL_MSG); +} + +#define RENAME_CHECK_SUCCESS(old, new) \ + (((old & (GR_WRITE | GR_READ)) == (GR_WRITE | GR_READ)) && \ + ((new & (GR_WRITE | GR_READ)) == (GR_WRITE | GR_READ))) + +int +gr_acl_handle_rename(struct dentry *new_dentry, + struct dentry *parent_dentry, + const struct vfsmount *parent_mnt, + struct dentry *old_dentry, + struct inode *old_parent_inode, + struct vfsmount *old_mnt, const char *newname) +{ + __u32 comp1, comp2; + int error = 0; + + if (unlikely(!gr_acl_is_enabled())) + return 0; + + if (!new_dentry->d_inode) { + comp1 = gr_check_create(new_dentry, parent_dentry, parent_mnt, + GR_READ | GR_WRITE | GR_CREATE | GR_AUDIT_READ | + GR_AUDIT_WRITE | GR_AUDIT_CREATE | GR_SUPPRESS); + comp2 = gr_search_file(old_dentry, GR_READ | GR_WRITE | + GR_DELETE | GR_AUDIT_DELETE | + GR_AUDIT_READ | GR_AUDIT_WRITE | + GR_SUPPRESS, old_mnt); + } else { + comp1 = gr_search_file(new_dentry, GR_READ | GR_WRITE | + GR_CREATE | GR_DELETE | + GR_AUDIT_CREATE | GR_AUDIT_DELETE | + GR_AUDIT_READ | GR_AUDIT_WRITE | + GR_SUPPRESS, parent_mnt); + comp2 = + gr_search_file(old_dentry, + GR_READ | GR_WRITE | GR_AUDIT_READ | + GR_DELETE | GR_AUDIT_DELETE | + GR_AUDIT_WRITE | GR_SUPPRESS, old_mnt); + } + + if (RENAME_CHECK_SUCCESS(comp1, comp2) && + ((comp1 & GR_AUDITS) || (comp2 & GR_AUDITS))) + gr_log_fs_rbac_str(GR_DO_AUDIT, GR_RENAME_ACL_MSG, old_dentry, old_mnt, newname); + else if (!RENAME_CHECK_SUCCESS(comp1, comp2) && !(comp1 & GR_SUPPRESS) + && !(comp2 & GR_SUPPRESS)) { + gr_log_fs_rbac_str(GR_DONT_AUDIT, GR_RENAME_ACL_MSG, old_dentry, old_mnt, newname); + error = -EACCES; + } else if (unlikely(!RENAME_CHECK_SUCCESS(comp1, comp2))) + error = -EACCES; + + return error; +} + +void +gr_acl_handle_exit(void) +{ + u16 id; + char *rolename; + struct file *exec_file; + + if (unlikely(current->acl_sp_role && gr_acl_is_enabled())) { + id = current->acl_role_id; + rolename = current->role->rolename; + gr_set_acls(1); + gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_SPROLEL_ACL_MSG, rolename, id); + } + + write_lock(&grsec_exec_file_lock); + exec_file = current->exec_file; + current->exec_file = NULL; + write_unlock(&grsec_exec_file_lock); + + if (exec_file) + fput(exec_file); +} + +int +gr_acl_handle_procpidmem(const struct task_struct *task) +{ + if (unlikely(!gr_acl_is_enabled())) + return 0; + + if (task->acl->mode & GR_PROTPROCFD) + return -EACCES; + + return 0; +} diff -Naur linux-2.6.19.2.orig/grsecurity/gracl_ip.c linux-2.6.19.2/grsecurity/gracl_ip.c --- linux-2.6.19.2.orig/grsecurity/gracl_ip.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/gracl_ip.c 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GR_BIND 0x01 +#define GR_CONNECT 0x02 +#define GR_INVERT 0x04 + +static const char * gr_protocols[256] = { + "ip", "icmp", "igmp", "ggp", "ipencap", "st", "tcp", "cbt", + "egp", "igp", "bbn-rcc", "nvp", "pup", "argus", "emcon", "xnet", + "chaos", "udp", "mux", "dcn", "hmp", "prm", "xns-idp", "trunk-1", + "trunk-2", "leaf-1", "leaf-2", "rdp", "irtp", "iso-tp4", "netblt", "mfe-nsp", + "merit-inp", "sep", "3pc", "idpr", "xtp", "ddp", "idpr-cmtp", "tp++", + "il", "ipv6", "sdrp", "ipv6-route", "ipv6-frag", "idrp", "rsvp", "gre", + "mhrp", "bna", "ipv6-crypt", "ipv6-auth", "i-nlsp", "swipe", "narp", "mobile", + "tlsp", "skip", "ipv6-icmp", "ipv6-nonxt", "ipv6-opts", "unknown:61", "cftp", "unknown:63", + "sat-expak", "kryptolan", "rvd", "ippc", "unknown:68", "sat-mon", "visa", "ipcv", + "cpnx", "cphb", "wsn", "pvp", "br-sat-mon", "sun-nd", "wb-mon", "wb-expak", + "iso-ip", "vmtp", "secure-vmtp", "vines", "ttp", "nfsnet-igp", "dgp", "tcf", + "eigrp", "ospf", "sprite-rpc", "larp", "mtp", "ax.25", "ipip", "micp", + "scc-sp", "etherip", "encap", "unknown:99", "gmtp", "ifmp", "pnni", "pim", + "aris", "scps", "qnx", "a/n", "ipcomp", "snp", "compaq-peer", "ipx-in-ip", + "vrrp", "pgm", "unknown:114", "l2tp", "ddx", "iatp", "stp", "srp", + "uti", "smp", "sm", "ptp", "isis", "fire", "crtp", "crdup", + "sscopmce", "iplt", "sps", "pipe", "sctp", "fc", "unkown:134", "unknown:135", + "unknown:136", "unknown:137", "unknown:138", "unknown:139", "unknown:140", "unknown:141", "unknown:142", "unknown:143", + "unknown:144", "unknown:145", "unknown:146", "unknown:147", "unknown:148", "unknown:149", "unknown:150", "unknown:151", + "unknown:152", "unknown:153", "unknown:154", "unknown:155", "unknown:156", "unknown:157", "unknown:158", "unknown:159", + "unknown:160", "unknown:161", "unknown:162", "unknown:163", "unknown:164", "unknown:165", "unknown:166", "unknown:167", + "unknown:168", "unknown:169", "unknown:170", "unknown:171", "unknown:172", "unknown:173", "unknown:174", "unknown:175", + "unknown:176", "unknown:177", "unknown:178", "unknown:179", "unknown:180", "unknown:181", "unknown:182", "unknown:183", + "unknown:184", "unknown:185", "unknown:186", "unknown:187", "unknown:188", "unknown:189", "unknown:190", "unknown:191", + "unknown:192", "unknown:193", "unknown:194", "unknown:195", "unknown:196", "unknown:197", "unknown:198", "unknown:199", + "unknown:200", "unknown:201", "unknown:202", "unknown:203", "unknown:204", "unknown:205", "unknown:206", "unknown:207", + "unknown:208", "unknown:209", "unknown:210", "unknown:211", "unknown:212", "unknown:213", "unknown:214", "unknown:215", + "unknown:216", "unknown:217", "unknown:218", "unknown:219", "unknown:220", "unknown:221", "unknown:222", "unknown:223", + "unknown:224", "unknown:225", "unknown:226", "unknown:227", "unknown:228", "unknown:229", "unknown:230", "unknown:231", + "unknown:232", "unknown:233", "unknown:234", "unknown:235", "unknown:236", "unknown:237", "unknown:238", "unknown:239", + "unknown:240", "unknown:241", "unknown:242", "unknown:243", "unknown:244", "unknown:245", "unknown:246", "unknown:247", + "unknown:248", "unknown:249", "unknown:250", "unknown:251", "unknown:252", "unknown:253", "unknown:254", "unknown:255", + }; + +static const char * gr_socktypes[11] = { + "unknown:0", "stream", "dgram", "raw", "rdm", "seqpacket", "unknown:6", + "unknown:7", "unknown:8", "unknown:9", "packet" + }; + +const char * +gr_proto_to_name(unsigned char proto) +{ + return gr_protocols[proto]; +} + +const char * +gr_socktype_to_name(unsigned char type) +{ + return gr_socktypes[type]; +} + +int +gr_search_socket(const int domain, const int type, const int protocol) +{ + struct acl_subject_label *curr; + + if (unlikely(!gr_acl_is_enabled())) + goto exit; + + if ((domain < 0) || (type < 0) || (protocol < 0) || (domain != PF_INET) + || (domain >= NPROTO) || (type >= SOCK_MAX) || (protocol > 255)) + goto exit; // let the kernel handle it + + curr = current->acl; + + if (!curr->ips) + goto exit; + + if ((curr->ip_type & (1 << type)) && + (curr->ip_proto[protocol / 32] & (1 << (protocol % 32)))) + goto exit; + + if (curr->mode & (GR_LEARN | GR_INHERITLEARN)) { + /* we don't place acls on raw sockets , and sometimes + dgram/ip sockets are opened for ioctl and not + bind/connect, so we'll fake a bind learn log */ + if (type == SOCK_RAW || type == SOCK_PACKET) { + __u32 fakeip = 0; + security_learn(GR_IP_LEARN_MSG, current->role->rolename, + current->role->roletype, current->uid, + current->gid, current->exec_file ? + gr_to_filename(current->exec_file->f_dentry, + current->exec_file->f_vfsmnt) : + curr->filename, curr->filename, + NIPQUAD(fakeip), 0, type, + protocol, GR_CONNECT, +NIPQUAD(current->signal->curr_ip)); + } else if ((type == SOCK_DGRAM) && (protocol == IPPROTO_IP)) { + __u32 fakeip = 0; + security_learn(GR_IP_LEARN_MSG, current->role->rolename, + current->role->roletype, current->uid, + current->gid, current->exec_file ? + gr_to_filename(current->exec_file->f_dentry, + current->exec_file->f_vfsmnt) : + curr->filename, curr->filename, + NIPQUAD(fakeip), 0, type, + protocol, GR_BIND, NIPQUAD(current->signal->curr_ip)); + } + /* we'll log when they use connect or bind */ + goto exit; + } + + gr_log_str3(GR_DONT_AUDIT, GR_SOCK_MSG, "inet", + gr_socktype_to_name(type), gr_proto_to_name(protocol)); + + return 0; + exit: + return 1; +} + +int check_ip_policy(struct acl_ip_label *ip, __u32 ip_addr, __u16 ip_port, __u8 protocol, const int mode, const int type, __u32 our_addr, __u32 our_netmask) +{ + if ((ip->mode & mode) && + (ip_port >= ip->low) && + (ip_port <= ip->high) && + ((ntohl(ip_addr) & our_netmask) == + (ntohl(our_addr) & our_netmask)) + && (ip->proto[protocol / 32] & (1 << (protocol % 32))) + && (ip->type & (1 << type))) { + if (ip->mode & GR_INVERT) + return 2; // specifically denied + else + return 1; // allowed + } + + return 0; // not specifically allowed, may continue parsing +} + +static int +gr_search_connectbind(const int mode, const struct sock *sk, + const struct sockaddr_in *addr, const int type) +{ + char iface[IFNAMSIZ] = {0}; + struct acl_subject_label *curr; + struct acl_ip_label *ip; + struct net_device *dev; + struct in_device *idev; + unsigned long i; + int ret; + __u32 ip_addr = 0; + __u32 our_addr; + __u32 our_netmask; + char *p; + __u16 ip_port = 0; + + if (unlikely(!gr_acl_is_enabled() || sk->sk_family != PF_INET)) + return 1; + + curr = current->acl; + + if (!curr->ips) + return 1; + + ip_addr = addr->sin_addr.s_addr; + ip_port = ntohs(addr->sin_port); + + if (curr->mode & (GR_LEARN | GR_INHERITLEARN)) { + security_learn(GR_IP_LEARN_MSG, current->role->rolename, + current->role->roletype, current->uid, + current->gid, current->exec_file ? + gr_to_filename(current->exec_file->f_dentry, + current->exec_file->f_vfsmnt) : + curr->filename, curr->filename, + NIPQUAD(ip_addr), ip_port, type, + sk->sk_protocol, mode, NIPQUAD(current->signal->curr_ip)); + return 1; + } + + for (i = 0; i < curr->ip_num; i++) { + ip = *(curr->ips + i); + if (ip->iface != NULL) { + strncpy(iface, ip->iface, IFNAMSIZ - 1); + p = strchr(iface, ':'); + if (p != NULL) + *p = '\0'; + dev = dev_get_by_name(iface); + if (dev == NULL) + continue; + idev = in_dev_get(dev); + if (idev == NULL) { + dev_put(dev); + continue; + } + rcu_read_lock(); + for_ifa(idev) { + if (!strcmp(ip->iface, ifa->ifa_label)) { + our_addr = ifa->ifa_address; + our_netmask = 0xffffffff; + ret = check_ip_policy(ip, ip_addr, ip_port, sk->sk_protocol, mode, type, our_addr, our_netmask); + if (ret == 1) { + rcu_read_unlock(); + in_dev_put(idev); + dev_put(dev); + return 1; + } else if (ret == 2) { + rcu_read_unlock(); + in_dev_put(idev); + dev_put(dev); + goto denied; + } + } + } endfor_ifa(idev); + rcu_read_unlock(); + in_dev_put(idev); + dev_put(dev); + } else { + our_addr = ip->addr; + our_netmask = ip->netmask; + ret = check_ip_policy(ip, ip_addr, ip_port, sk->sk_protocol, mode, type, our_addr, our_netmask); + if (ret == 1) + return 1; + else if (ret == 2) + goto denied; + } + } + +denied: + if (mode == GR_BIND) + gr_log_int5_str2(GR_DONT_AUDIT, GR_BIND_ACL_MSG, NIPQUAD(ip_addr), ip_port, gr_socktype_to_name(type), gr_proto_to_name(sk->sk_protocol)); + else if (mode == GR_CONNECT) + gr_log_int5_str2(GR_DONT_AUDIT, GR_CONNECT_ACL_MSG, NIPQUAD(ip_addr), ip_port, gr_socktype_to_name(type), gr_proto_to_name(sk->sk_protocol)); + + return 0; +} + +int +gr_search_connect(const struct socket *sock, const struct sockaddr_in *addr) +{ + return gr_search_connectbind(GR_CONNECT, sock->sk, addr, sock->type); +} + +int +gr_search_bind(const struct socket *sock, const struct sockaddr_in *addr) +{ + return gr_search_connectbind(GR_BIND, sock->sk, addr, sock->type); +} + +int gr_search_listen(const struct socket *sock) +{ + struct sock *sk = sock->sk; + struct sockaddr_in addr; + + addr.sin_addr.s_addr = inet_sk(sk)->saddr; + addr.sin_port = inet_sk(sk)->sport; + + return gr_search_connectbind(GR_BIND, sock->sk, &addr, sock->type); +} + +int gr_search_accept(const struct socket *sock) +{ + struct sock *sk = sock->sk; + struct sockaddr_in addr; + + addr.sin_addr.s_addr = inet_sk(sk)->saddr; + addr.sin_port = inet_sk(sk)->sport; + + return gr_search_connectbind(GR_BIND, sock->sk, &addr, sock->type); +} + +int +gr_search_udp_sendmsg(const struct sock *sk, const struct sockaddr_in *addr) +{ + if (addr) + return gr_search_connectbind(GR_CONNECT, sk, addr, SOCK_DGRAM); + else { + struct sockaddr_in sin; + const struct inet_sock *inet = inet_sk(sk); + + sin.sin_addr.s_addr = inet->daddr; + sin.sin_port = inet->dport; + + return gr_search_connectbind(GR_CONNECT, sk, &sin, SOCK_DGRAM); + } +} + +int +gr_search_udp_recvmsg(const struct sock *sk, const struct sk_buff *skb) +{ + struct sockaddr_in sin; + + if (unlikely(skb->len < sizeof (struct udphdr))) + return 1; // skip this packet + + sin.sin_addr.s_addr = skb->nh.iph->saddr; + sin.sin_port = skb->h.uh->source; + + return gr_search_connectbind(GR_CONNECT, sk, &sin, SOCK_DGRAM); +} diff -Naur linux-2.6.19.2.orig/grsecurity/gracl_learn.c linux-2.6.19.2/grsecurity/gracl_learn.c --- linux-2.6.19.2.orig/grsecurity/gracl_learn.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/gracl_learn.c 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,204 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern ssize_t write_grsec_handler(struct file * file, const char __user * buf, + size_t count, loff_t *ppos); +extern int gr_acl_is_enabled(void); + +static DECLARE_WAIT_QUEUE_HEAD(learn_wait); +static int gr_learn_attached; + +/* use a 512k buffer */ +#define LEARN_BUFFER_SIZE (512 * 1024) + +static spinlock_t gr_learn_lock = SPIN_LOCK_UNLOCKED; +static DECLARE_MUTEX(gr_learn_user_sem); + +/* we need to maintain two buffers, so that the kernel context of grlearn + uses a semaphore around the userspace copying, and the other kernel contexts + use a spinlock when copying into the buffer, since they cannot sleep +*/ +static char *learn_buffer; +static char *learn_buffer_user; +static int learn_buffer_len; +static int learn_buffer_user_len; + +static ssize_t +read_learn(struct file *file, char __user * buf, size_t count, loff_t * ppos) +{ + DECLARE_WAITQUEUE(wait, current); + ssize_t retval = 0; + + add_wait_queue(&learn_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + do { + down(&gr_learn_user_sem); + spin_lock(&gr_learn_lock); + if (learn_buffer_len) + break; + spin_unlock(&gr_learn_lock); + up(&gr_learn_user_sem); + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto out; + } + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto out; + } + + schedule(); + } while (1); + + memcpy(learn_buffer_user, learn_buffer, learn_buffer_len); + learn_buffer_user_len = learn_buffer_len; + retval = learn_buffer_len; + learn_buffer_len = 0; + + spin_unlock(&gr_learn_lock); + + if (copy_to_user(buf, learn_buffer_user, learn_buffer_user_len)) + retval = -EFAULT; + + up(&gr_learn_user_sem); +out: + set_current_state(TASK_RUNNING); + remove_wait_queue(&learn_wait, &wait); + return retval; +} + +static unsigned int +poll_learn(struct file * file, poll_table * wait) +{ + poll_wait(file, &learn_wait, wait); + + if (learn_buffer_len) + return (POLLIN | POLLRDNORM); + + return 0; +} + +void +gr_clear_learn_entries(void) +{ + char *tmp; + + down(&gr_learn_user_sem); + if (learn_buffer != NULL) { + spin_lock(&gr_learn_lock); + tmp = learn_buffer; + learn_buffer = NULL; + spin_unlock(&gr_learn_lock); + vfree(learn_buffer); + } + if (learn_buffer_user != NULL) { + vfree(learn_buffer_user); + learn_buffer_user = NULL; + } + learn_buffer_len = 0; + up(&gr_learn_user_sem); + + return; +} + +void +gr_add_learn_entry(const char *fmt, ...) +{ + va_list args; + unsigned int len; + + if (!gr_learn_attached) + return; + + spin_lock(&gr_learn_lock); + + /* leave a gap at the end so we know when it's "full" but don't have to + compute the exact length of the string we're trying to append + */ + if (learn_buffer_len > LEARN_BUFFER_SIZE - 16384) { + spin_unlock(&gr_learn_lock); + wake_up_interruptible(&learn_wait); + return; + } + if (learn_buffer == NULL) { + spin_unlock(&gr_learn_lock); + return; + } + + va_start(args, fmt); + len = vsnprintf(learn_buffer + learn_buffer_len, LEARN_BUFFER_SIZE - learn_buffer_len, fmt, args); + va_end(args); + + learn_buffer_len += len + 1; + + spin_unlock(&gr_learn_lock); + wake_up_interruptible(&learn_wait); + + return; +} + +static int +open_learn(struct inode *inode, struct file *file) +{ + if (file->f_mode & FMODE_READ && gr_learn_attached) + return -EBUSY; + if (file->f_mode & FMODE_READ) { + down(&gr_learn_user_sem); + if (learn_buffer == NULL) + learn_buffer = vmalloc(LEARN_BUFFER_SIZE); + if (learn_buffer_user == NULL) + learn_buffer_user = vmalloc(LEARN_BUFFER_SIZE); + if (learn_buffer == NULL) + return -ENOMEM; + if (learn_buffer_user == NULL) + return -ENOMEM; + learn_buffer_len = 0; + learn_buffer_user_len = 0; + gr_learn_attached = 1; + up(&gr_learn_user_sem); + } + return 0; +} + +static int +close_learn(struct inode *inode, struct file *file) +{ + char *tmp; + + if (file->f_mode & FMODE_READ) { + down(&gr_learn_user_sem); + if (learn_buffer != NULL) { + spin_lock(&gr_learn_lock); + tmp = learn_buffer; + learn_buffer = NULL; + spin_unlock(&gr_learn_lock); + vfree(tmp); + } + if (learn_buffer_user != NULL) { + vfree(learn_buffer_user); + learn_buffer_user = NULL; + } + learn_buffer_len = 0; + learn_buffer_user_len = 0; + gr_learn_attached = 0; + up(&gr_learn_user_sem); + } + + return 0; +} + +struct file_operations grsec_fops = { + .read = read_learn, + .write = write_grsec_handler, + .open = open_learn, + .release = close_learn, + .poll = poll_learn, +}; diff -Naur linux-2.6.19.2.orig/grsecurity/gracl_res.c linux-2.6.19.2/grsecurity/gracl_res.c --- linux-2.6.19.2.orig/grsecurity/gracl_res.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/gracl_res.c 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,45 @@ +#include +#include +#include +#include + +static const char *restab_log[] = { + [RLIMIT_CPU] = "RLIMIT_CPU", + [RLIMIT_FSIZE] = "RLIMIT_FSIZE", + [RLIMIT_DATA] = "RLIMIT_DATA", + [RLIMIT_STACK] = "RLIMIT_STACK", + [RLIMIT_CORE] = "RLIMIT_CORE", + [RLIMIT_RSS] = "RLIMIT_RSS", + [RLIMIT_NPROC] = "RLIMIT_NPROC", + [RLIMIT_NOFILE] = "RLIMIT_NOFILE", + [RLIMIT_MEMLOCK] = "RLIMIT_MEMLOCK", + [RLIMIT_AS] = "RLIMIT_AS", + [RLIMIT_LOCKS] = "RLIMIT_LOCKS", + [RLIMIT_LOCKS + 1] = "RLIMIT_CRASH" +}; + +void +gr_log_resource(const struct task_struct *task, + const int res, const unsigned long wanted, const int gt) +{ + if (res == RLIMIT_NPROC && + (cap_raised(task->cap_effective, CAP_SYS_ADMIN) || + cap_raised(task->cap_effective, CAP_SYS_RESOURCE))) + return; + else if (res == RLIMIT_MEMLOCK && + cap_raised(task->cap_effective, CAP_IPC_LOCK)) + return; + + if (!gr_acl_is_enabled() && !grsec_resource_logging) + return; + + preempt_disable(); + + if (unlikely(((gt && wanted > task->signal->rlim[res].rlim_cur) || + (!gt && wanted >= task->signal->rlim[res].rlim_cur)) && + task->signal->rlim[res].rlim_cur != RLIM_INFINITY)) + gr_log_res_ulong2_str(GR_DONT_AUDIT, GR_RESOURCE_MSG, task, wanted, restab_log[res], task->signal->rlim[res].rlim_cur); + preempt_enable_no_resched(); + + return; +} diff -Naur linux-2.6.19.2.orig/grsecurity/gracl_segv.c linux-2.6.19.2/grsecurity/gracl_segv.c --- linux-2.6.19.2.orig/grsecurity/gracl_segv.c 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19.2/grsecurity/gracl_segv.c 2007-01-12 23:27:23.000000000 +0000 @@ -0,0 +1,295 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct crash_uid *uid_set; +static unsigned short uid_used; +static spinlock_t gr_uid_lock = SPIN_LOCK_UNLOCKED; +extern rwlock_t gr_inode_lock; +extern struct acl_subject_la