diff --git a/debian/patches/bugfix/all/patch-2.6.25-rc7-git2 b/debian/patches/bugfix/all/patch-2.6.25-rc7-git2 new file mode 100644 index 000000000..33b7238da --- /dev/null +++ b/debian/patches/bugfix/all/patch-2.6.25-rc7-git2 @@ -0,0 +1,632 @@ +diff --git a/Documentation/i386/IO-APIC.txt b/Documentation/i386/IO-APIC.txt +index f951666..30b4c71 100644 +--- a/Documentation/i386/IO-APIC.txt ++++ b/Documentation/i386/IO-APIC.txt +@@ -70,7 +70,7 @@ Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD: + + These INTA-D PCI IRQs are always 'local to the card', their real meaning + depends on which slot they are in. If you look at the daisy chaining diagram, +-a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ2 of ++a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ4 of + the PCI chipset. Most cards issue INTA, this creates optimal distribution + between the PIRQ lines. (distributing IRQ sources properly is not a + necessity, PCI IRQs can be shared at will, but it's a good for performance +diff --git a/MAINTAINERS b/MAINTAINERS +index 73883b8..2f70e5c 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2322,6 +2322,8 @@ P: Anil S Keshavamurthy + M: anil.s.keshavamurthy@intel.com + P: David S. Miller + M: davem@davemloft.net ++P: Masami Hiramatsu ++M: mhiramat@redhat.com + L: linux-kernel@vger.kernel.org + S: Maintained + +diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +index f2b5a62..8a85c93 100644 +--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c ++++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +@@ -63,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { + */ + static int speedstep_smi_ownership (void) + { +- u32 command, result, magic; ++ u32 command, result, magic, dummy; + u32 function = GET_SPEEDSTEP_OWNER; + unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; + +@@ -73,8 +73,11 @@ static int speedstep_smi_ownership (void) + dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__( ++ "push %%ebp\n" + "out %%al, (%%dx)\n" +- : "=D" (result) ++ "pop %%ebp\n" ++ : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), ++ "=S" (dummy) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), + "D" (0), "S" (magic) + : "memory" +@@ -96,7 +99,7 @@ static int speedstep_smi_ownership (void) + */ + static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) + { +- u32 command, result = 0, edi, high_mhz, low_mhz; ++ u32 command, result = 0, edi, high_mhz, low_mhz, dummy; + u32 state=0; + u32 function = GET_SPEEDSTEP_FREQS; + +@@ -109,10 +112,12 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) + + dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); + +- __asm__ __volatile__("movl $0, %%edi\n" ++ __asm__ __volatile__( ++ "push %%ebp\n" + "out %%al, (%%dx)\n" +- : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) +- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) ++ "pop %%ebp" ++ : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy) ++ : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) + ); + + dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); +@@ -135,16 +140,18 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) + static int speedstep_get_state (void) + { + u32 function=GET_SPEEDSTEP_STATE; +- u32 result, state, edi, command; ++ u32 result, state, edi, command, dummy; + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); + +- __asm__ __volatile__("movl $0, %%edi\n" ++ __asm__ __volatile__( ++ "push %%ebp\n" + "out %%al, (%%dx)\n" +- : "=a" (result), "=b" (state), "=D" (edi) +- : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) ++ "pop %%ebp\n" ++ : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy) ++ : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0) + ); + + dprintk("state is %x, result is %x\n", state, result); +@@ -160,7 +167,7 @@ static int speedstep_get_state (void) + */ + static void speedstep_set_state (unsigned int state) + { +- unsigned int result = 0, command, new_state; ++ unsigned int result = 0, command, new_state, dummy; + unsigned long flags; + unsigned int function=SET_SPEEDSTEP_STATE; + unsigned int retry = 0; +@@ -182,10 +189,12 @@ static void speedstep_set_state (unsigned int state) + } + retry++; + __asm__ __volatile__( +- "movl $0, %%edi\n" ++ "push %%ebp\n" + "out %%al, (%%dx)\n" +- : "=b" (new_state), "=D" (result) +- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) ++ "pop %%ebp" ++ : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy), ++ "=d" (dummy), "=S" (dummy) ++ : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) + ); + } while ((new_state != state) && (retry <= SMI_TRIES)); + +@@ -195,7 +204,7 @@ static void speedstep_set_state (unsigned int state) + if (new_state == state) { + dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); + } else { +- printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); ++ printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result); + } + + return; +diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c +index 103d61a..3e18db4 100644 +--- a/arch/x86/kernel/cpu/mtrr/generic.c ++++ b/arch/x86/kernel/cpu/mtrr/generic.c +@@ -176,12 +176,13 @@ static inline void k8_enable_fixed_iorrs(void) + } + + /** +- * Checks and updates an fixed-range MTRR if it differs from the value it +- * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also. +- * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information +- * \param msr MSR address of the MTTR which should be checked and updated +- * \param changed pointer which indicates whether the MTRR needed to be changed +- * \param msrwords pointer to the MSR values which the MSR should have ++ * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have ++ * @msr: MSR address of the MTTR which should be checked and updated ++ * @changed: pointer which indicates whether the MTRR needed to be changed ++ * @msrwords: pointer to the MSR values which the MSR should have ++ * ++ * If K8 extentions are wanted, update the K8 SYSCFG MSR also. ++ * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information. + */ + static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) + { +@@ -199,12 +200,15 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) + } + } + ++/** ++ * generic_get_free_region - Get a free MTRR. ++ * @base: The starting (base) address of the region. ++ * @size: The size (in bytes) of the region. ++ * @replace_reg: mtrr index to be replaced; set to invalid value if none. ++ * ++ * Returns: The index of the region on success, else negative on error. ++ */ + int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) +-/* [SUMMARY] Get a free MTRR. +- The starting (base) address of the region. +- The size (in bytes) of the region. +- [RETURNS] The index of the region on success, else -1 on error. +-*/ + { + int i, max; + mtrr_type ltype; +@@ -249,8 +253,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, + } + + /** +- * Checks and updates the fixed-range MTRRs if they differ from the saved set +- * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() ++ * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set ++ * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() + */ + static int set_fixed_ranges(mtrr_type * frs) + { +@@ -294,13 +298,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) + + static u32 deftype_lo, deftype_hi; + ++/** ++ * set_mtrr_state - Set the MTRR state for this CPU. ++ * ++ * NOTE: The CPU must already be in a safe state for MTRR changes. ++ * RETURNS: 0 if no changes made, else a mask indicating what was changed. ++ */ + static unsigned long set_mtrr_state(void) +-/* [SUMMARY] Set the MTRR state for this CPU. +- The MTRR state information to read. +- Some relevant CPU context. +- [NOTE] The CPU must already be in a safe state for MTRR changes. +- [RETURNS] 0 if no changes made, else a mask indication what was changed. +-*/ + { + unsigned int i; + unsigned long change_mask = 0; +diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c +index c706a30..5921e5f 100644 +--- a/arch/x86/kernel/io_delay.c ++++ b/arch/x86/kernel/io_delay.c +@@ -78,6 +78,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { + }, + { + .callback = dmi_io_delay_0xed_port, ++ .ident = "HP Pavilion dv6000", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), ++ DMI_MATCH(DMI_BOARD_NAME, "30B8") ++ } ++ }, ++ { ++ .callback = dmi_io_delay_0xed_port, + .ident = "HP Pavilion tx1000", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), +diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c +index 027fc06..b402c0f 100644 +--- a/arch/x86/kernel/mfgpt_32.c ++++ b/arch/x86/kernel/mfgpt_32.c +@@ -30,6 +30,7 @@ + + #include + #include ++#include + #include + + static struct mfgpt_timer_t { +diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c +index a1d7071..2b3e5d4 100644 +--- a/arch/x86/kernel/setup_32.c ++++ b/arch/x86/kernel/setup_32.c +@@ -406,8 +406,6 @@ static unsigned long __init setup_memory(void) + */ + min_low_pfn = PFN_UP(init_pg_tables_end); + +- find_max_pfn(); +- + max_low_pfn = find_max_low_pfn(); + + #ifdef CONFIG_HIGHMEM +@@ -764,12 +762,13 @@ void __init setup_arch(char **cmdline_p) + if (efi_enabled) + efi_init(); + +- max_low_pfn = setup_memory(); +- + /* update e820 for memory not covered by WB MTRRs */ ++ find_max_pfn(); + mtrr_bp_init(); + if (mtrr_trim_uncached_memory(max_pfn)) +- max_low_pfn = setup_memory(); ++ find_max_pfn(); ++ ++ max_low_pfn = setup_memory(); + + #ifdef CONFIG_VMI + /* +diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c +index 7637dc9..f4f7ecf 100644 +--- a/arch/x86/kernel/setup_64.c ++++ b/arch/x86/kernel/setup_64.c +@@ -801,7 +801,7 @@ static void __cpuinit srat_detect_node(void) + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; +- if (node == NUMA_NO_NODE) ++ if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + +diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c +index c394ca0..8e25e06 100644 +--- a/arch/x86/mm/discontig_32.c ++++ b/arch/x86/mm/discontig_32.c +@@ -324,7 +324,6 @@ unsigned long __init setup_memory(void) + * this space and use it to adjust the boundary between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ +- find_max_pfn(); + get_memcfg_numa(); + + kva_pages = calculate_numa_remap_pages(); +diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c +index 4afaba0..794895c 100644 +--- a/arch/x86/mm/ioremap.c ++++ b/arch/x86/mm/ioremap.c +@@ -137,7 +137,11 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, + switch (mode) { + case IOR_MODE_UNCACHED: + default: +- prot = PAGE_KERNEL_NOCACHE; ++ /* ++ * FIXME: we will use UC MINUS for now, as video fb drivers ++ * depend on it. Upcoming ioremap_wc() will fix this behavior. ++ */ ++ prot = PAGE_KERNEL_UC_MINUS; + break; + case IOR_MODE_CACHED: + prot = PAGE_KERNEL; +diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c +index 14e48b5..7b79f6b 100644 +--- a/arch/x86/mm/pageattr.c ++++ b/arch/x86/mm/pageattr.c +@@ -771,7 +771,7 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages, + int set_memory_uc(unsigned long addr, int numpages) + { + return change_page_attr_set(addr, numpages, +- __pgprot(_PAGE_PCD | _PAGE_PWT)); ++ __pgprot(_PAGE_PCD)); + } + EXPORT_SYMBOL(set_memory_uc); + +diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c +index 125e7b7..f7cb8e0 100644 +--- a/drivers/pci/setup-bus.c ++++ b/drivers/pci/setup-bus.c +@@ -486,12 +486,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) + break; + + case PCI_CLASS_BRIDGE_PCI: +- /* don't size subtractive decoding (transparent) +- * PCI-to-PCI bridges */ +- if (bus->self->transparent) +- break; + pci_bridge_check_ranges(bus); +- /* fall through */ + default: + pbus_size_io(bus); + /* If the bridge supports prefetchable range, size it +diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h +index 174b877..9cf472a 100644 +--- a/include/asm-x86/pgtable.h ++++ b/include/asm-x86/pgtable.h +@@ -85,6 +85,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; + #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) + #define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) + #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) ++#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) + #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) + #define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) + #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) +@@ -101,6 +102,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; + #define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) + #define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX) + #define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) ++#define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS) + #define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE) + #define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE) + #define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC) +diff --git a/include/linux/sched.h b/include/linux/sched.h +index fed07d0..6a1e7af 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1541,6 +1541,12 @@ static inline void idle_task_exit(void) {} + + extern void sched_idle_next(void); + ++#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) ++extern void wake_up_idle_cpu(int cpu); ++#else ++static inline void wake_up_idle_cpu(int cpu) { } ++#endif ++ + #ifdef CONFIG_SCHED_DEBUG + extern unsigned int sysctl_sched_latency; + extern unsigned int sysctl_sched_min_granularity; +diff --git a/kernel/relay.c b/kernel/relay.c +index 4c035a8..d6204a4 100644 +--- a/kernel/relay.c ++++ b/kernel/relay.c +@@ -736,7 +736,7 @@ static int relay_file_open(struct inode *inode, struct file *filp) + kref_get(&buf->kref); + filp->private_data = buf; + +- return 0; ++ return nonseekable_open(inode, filp); + } + + /** +@@ -1056,6 +1056,10 @@ static struct pipe_buf_operations relay_pipe_buf_ops = { + .get = generic_pipe_buf_get, + }; + ++static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i) ++{ ++} ++ + /* + * subbuf_splice_actor - splice up to one subbuf's worth of data + */ +@@ -1083,6 +1087,7 @@ static int subbuf_splice_actor(struct file *in, + .partial = partial, + .flags = flags, + .ops = &relay_pipe_buf_ops, ++ .spd_release = relay_page_release, + }; + + if (rbuf->subbufs_produced == rbuf->subbufs_consumed) +diff --git a/kernel/sched.c b/kernel/sched.c +index 28c73f0..8dcdec6 100644 +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -1052,6 +1052,49 @@ static void resched_cpu(int cpu) + resched_task(cpu_curr(cpu)); + spin_unlock_irqrestore(&rq->lock, flags); + } ++ ++#ifdef CONFIG_NO_HZ ++/* ++ * When add_timer_on() enqueues a timer into the timer wheel of an ++ * idle CPU then this timer might expire before the next timer event ++ * which is scheduled to wake up that CPU. In case of a completely ++ * idle system the next event might even be infinite time into the ++ * future. wake_up_idle_cpu() ensures that the CPU is woken up and ++ * leaves the inner idle loop so the newly added timer is taken into ++ * account when the CPU goes back to idle and evaluates the timer ++ * wheel for the next timer event. ++ */ ++void wake_up_idle_cpu(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (cpu == smp_processor_id()) ++ return; ++ ++ /* ++ * This is safe, as this function is called with the timer ++ * wheel base lock of (cpu) held. When the CPU is on the way ++ * to idle and has not yet set rq->curr to idle then it will ++ * be serialized on the timer wheel base lock and take the new ++ * timer into account automatically. ++ */ ++ if (rq->curr != rq->idle) ++ return; ++ ++ /* ++ * We can set TIF_RESCHED on the idle task of the other CPU ++ * lockless. The worst case is that the other CPU runs the ++ * idle task through an additional NOOP schedule() ++ */ ++ set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED); ++ ++ /* NEED_RESCHED must be visible before we test polling */ ++ smp_mb(); ++ if (!tsk_is_polling(rq->idle)) ++ smp_send_reschedule(cpu); ++} ++#endif ++ + #else + static void __resched_task(struct task_struct *p, int tif_bit) + { +diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c +index 278534b..7f60097 100644 +--- a/kernel/time/clocksource.c ++++ b/kernel/time/clocksource.c +@@ -174,7 +174,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) + if (watchdog) + del_timer(&watchdog_timer); + watchdog = cs; +- init_timer_deferrable(&watchdog_timer); ++ init_timer(&watchdog_timer); + watchdog_timer.function = clocksource_watchdog; + + /* Reset watchdog cycles */ +diff --git a/kernel/timer.c b/kernel/timer.c +index 99b00a2..b024106 100644 +--- a/kernel/timer.c ++++ b/kernel/timer.c +@@ -451,10 +451,18 @@ void add_timer_on(struct timer_list *timer, int cpu) + spin_lock_irqsave(&base->lock, flags); + timer_set_base(timer, base); + internal_add_timer(base, timer); ++ /* ++ * Check whether the other CPU is idle and needs to be ++ * triggered to reevaluate the timer wheel when nohz is ++ * active. We are protected against the other CPU fiddling ++ * with the timer by holding the timer base lock. This also ++ * makes sure that a CPU on the way to idle can not evaluate ++ * the timer wheel. ++ */ ++ wake_up_idle_cpu(cpu); + spin_unlock_irqrestore(&base->lock, flags); + } + +- + /** + * mod_timer - modify a timer's timeout + * @timer: the timer to be modified +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 74c1b6b..51c9e2c 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -401,12 +401,20 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) + struct page *page; + unsigned long nr_pages; + ++ /* ++ * We want to release as many surplus pages as possible, spread ++ * evenly across all nodes. Iterate across all nodes until we ++ * can no longer free unreserved surplus pages. This occurs when ++ * the nodes with surplus pages have no free pages. ++ */ ++ unsigned long remaining_iterations = num_online_nodes(); ++ + /* Uncommit the reservation */ + resv_huge_pages -= unused_resv_pages; + + nr_pages = min(unused_resv_pages, surplus_huge_pages); + +- while (nr_pages) { ++ while (remaining_iterations-- && nr_pages) { + nid = next_node(nid, node_online_map); + if (nid == MAX_NUMNODES) + nid = first_node(node_online_map); +@@ -424,6 +432,7 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) + surplus_huge_pages--; + surplus_huge_pages_node[nid]--; + nr_pages--; ++ remaining_iterations = num_online_nodes(); + } + } + } +@@ -671,9 +680,11 @@ int hugetlb_report_node_meminfo(int nid, char *buf) + { + return sprintf(buf, + "Node %d HugePages_Total: %5u\n" +- "Node %d HugePages_Free: %5u\n", ++ "Node %d HugePages_Free: %5u\n" ++ "Node %d HugePages_Surp: %5u\n", + nid, nr_huge_pages_node[nid], +- nid, free_huge_pages_node[nid]); ++ nid, free_huge_pages_node[nid], ++ nid, surplus_huge_pages_node[nid]); + } + + /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ +diff --git a/mm/slab.c b/mm/slab.c +index bb4070e..04b308c 100644 +--- a/mm/slab.c ++++ b/mm/slab.c +@@ -1481,7 +1481,7 @@ void __init kmem_cache_init(void) + list_add(&cache_cache.next, &cache_chain); + cache_cache.colour_off = cache_line_size(); + cache_cache.array[smp_processor_id()] = &initarray_cache.cache; +- cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE]; ++ cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; + + /* + * struct kmem_cache size depends on nr_node_ids, which +@@ -1602,7 +1602,7 @@ void __init kmem_cache_init(void) + int nid; + + for_each_online_node(nid) { +- init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], nid); ++ init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid); + + init_list(malloc_sizes[INDEX_AC].cs_cachep, + &initkmem_list3[SIZE_AC + nid], nid); +diff --git a/mm/slub.c b/mm/slub.c +index ca71d5b..b72bc98 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2685,6 +2685,7 @@ void kfree(const void *x) + } + EXPORT_SYMBOL(kfree); + ++#if defined(SLUB_DEBUG) || defined(CONFIG_SLABINFO) + static unsigned long count_partial(struct kmem_cache_node *n) + { + unsigned long flags; +@@ -2697,6 +2698,7 @@ static unsigned long count_partial(struct kmem_cache_node *n) + spin_unlock_irqrestore(&n->list_lock, flags); + return x; + } ++#endif + + /* + * kmem_cache_shrink removes empty slabs from the partial lists and sorts +diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +index 9712716..c22d6b6 100644 +--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c ++++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +@@ -322,15 +322,6 @@ next_sge: + ctxt->direction = DMA_FROM_DEVICE; + clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); + clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); +- if ((ch+1)->rc_discrim == 0) { +- /* +- * Checked in sq_cq_reap to see if we need to +- * be enqueued +- */ +- set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); +- ctxt->next = hdr_ctxt; +- hdr_ctxt->next = head; +- } + + /* Prepare READ WR */ + memset(&read_wr, 0, sizeof read_wr); +@@ -348,7 +339,17 @@ next_sge: + rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], + &sgl_offset, + read_wr.num_sge); +- ++ if (((ch+1)->rc_discrim == 0) && ++ (read_wr.num_sge == ch_sge_ary[ch_no].count)) { ++ /* ++ * Mark the last RDMA_READ with a bit to ++ * indicate all RPC data has been fetched from ++ * the client and the RPC needs to be enqueued. ++ */ ++ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); ++ ctxt->next = hdr_ctxt; ++ hdr_ctxt->next = head; ++ } + /* Post the read */ + err = svc_rdma_send(xprt, &read_wr); + if (err) { diff --git a/debian/patches/series/1~experimental.1 b/debian/patches/series/1~experimental.1 index 554c3fa8a..de65b6342 100644 --- a/debian/patches/series/1~experimental.1 +++ b/debian/patches/series/1~experimental.1 @@ -1,3 +1,4 @@ ++ bugfix/all/patch-2.6.25-rc7-git2 + debian/version.patch + debian/kernelvariables.patch + debian/doc-build-parallel.patch