[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patch] pae-2.4.3-A4




On Mon, 19 Mar 2001, Linus Torvalds wrote:

> The complete changelog is appended, but the biggest recent change is
> the mmap_sem change, which I updated with new locking rules for
> pte/pmd_alloc to avoid the race on the actual page table build.
>
> This has only been tested on i386 without PAE, and is known to break
> other architectures. Ingo, mind checking what PAE needs? [...]

one nontrivial issue was that on PAE the pgd has to be installed with
'present' pgd entries, due to a CPU erratum. This means that the
pgd_present() code in mm/memory.c, while correct theoretically, doesnt
work with PAE. An equivalent solution is to use !pgd_none(), which also
works with the PAE workaround.

PAE mode could re-define pgd_present() to filter out the workaround - do
you prefer this to the !pgd_none() solution?

the rest was pretty straightforward.

in any case, with the attached pae-2.4.3-A4 patch (against 2.4.3-pre7,
applies to 2.4.2-ac24 cleanly as well) applied, 2.4.3-pre7 boots & works
just fine on PAE 64GB-HIGHMEM and non-PAE kernels.

- the patch also does another cleanup: removes various bad_pagetable code
  snippets all around the x86 tree, it's not needed anymore. This saves 8
  KB RAM on x86 systems.

- removed the last remaining *_kernel() macro.

- fixed a minor clear_page() bug in pgalloc.h, gfp() could fail in the
  future.

	Ingo
--- linux/mm/memory.c.orig	Sun Mar 25 18:55:05 2001
+++ linux/mm/memory.c	Sun Mar 25 18:55:07 2001
@@ -1295,7 +1295,7 @@
 		 * Because we dropped the lock, we should re-check the
 		 * entry, as somebody else could have populated it..
 		 */
-		if (pgd_present(*pgd)) {
+		if (!pgd_none(*pgd)) {
 			pmd_free(new);
 			goto out;
 		}
@@ -1313,7 +1313,7 @@
  */
 pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 {
-	if (!pmd_present(*pmd)) {
+	if (pmd_none(*pmd)) {
 		pte_t *new;
 
 		/* "fast" allocation can happen without dropping the lock.. */
@@ -1329,7 +1329,7 @@
 			 * Because we dropped the lock, we should re-check the
 			 * entry, as somebody else could have populated it..
 			 */
-			if (pmd_present(*pmd)) {
+			if (!pmd_none(*pmd)) {
 				pte_free(new);
 				goto out;
 			}
--- linux/include/asm-i386/pgalloc-3level.h.orig	Sun Mar 25 18:55:05 2001
+++ linux/include/asm-i386/pgalloc-3level.h	Sun Mar 25 19:23:02 2001
@@ -21,12 +21,12 @@
 {
 	unsigned long *ret;
 
-	if ((ret = pmd_quicklist) != NULL) {
+	ret = pmd_quicklist;
+	if (ret != NULL) {
 		pmd_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		pgtable_cache_size--;
-	} else
-		ret = (unsigned long *)get_pmd_slow();
+	}
 	return (pmd_t *)ret;
 }
 
@@ -41,5 +41,10 @@
 {
 	free_page((unsigned long)pmd);
 }
+
+#define pmd_free(pmd) pmd_free_fast(pmd)
+
+#define pgd_populate(pgd, pmd) \
+	do { set_pgd(pgd, __pgd(1 + __pa(pmd))); __flush_tlb(); } while(0)
 
 #endif /* _I386_PGALLOC_3LEVEL_H */
--- linux/include/asm-i386/pgalloc.h.orig	Sun Mar 25 18:56:25 2001
+++ linux/include/asm-i386/pgalloc.h	Sun Mar 25 19:35:59 2001
@@ -11,7 +11,8 @@
 #define pte_quicklist (current_cpu_data.pte_quick)
 #define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
 
-#define pmd_populate(pmd, pte)		set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
+#define pmd_populate(pmd, pte) \
+		set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
 
 #if CONFIG_X86_PAE
 # include <asm/pgalloc-3level.h>
@@ -72,7 +73,8 @@
 	pte_t *pte;
 
 	pte = (pte_t *) __get_free_page(GFP_KERNEL);
-	clear_page(pte);
+	if (pte)
+		clear_page(pte);
 	return pte;
 }
 
@@ -100,7 +102,6 @@
 	free_page((unsigned long)pte);
 }
 
-#define pte_free_kernel(pte)	pte_free_slow(pte)
 #define pte_free(pte)		pte_free_slow(pte)
 #define pgd_free(pgd)		free_pgd_slow(pgd)
 #define pgd_alloc()		get_pgd_fast()
--- linux/include/asm-i386/pgtable.h.orig	Sun Mar 25 19:03:58 2001
+++ linux/include/asm-i386/pgtable.h	Sun Mar 25 19:04:06 2001
@@ -243,12 +243,6 @@
 /* page table for 0-4MB for everybody */
 extern unsigned long pg0[1024];
 
-/*
- * Handling allocation failures during page table setup.
- */
-extern void __handle_bad_pmd(pmd_t * pmd);
-extern void __handle_bad_pmd_kernel(pmd_t * pmd);
-
 #define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
 #define pte_clear(xp)	do { set_pte(xp, __pte(0)); } while (0)
 
--- linux/arch/i386/mm/init.c.orig	Sun Mar 25 19:02:05 2001
+++ linux/arch/i386/mm/init.c	Sun Mar 25 19:02:42 2001
@@ -40,77 +40,6 @@
 static unsigned long totalram_pages;
 static unsigned long totalhigh_pages;
 
-/*
- * BAD_PAGE is the page that is used for page faults when linux
- * is out-of-memory. Older versions of linux just did a
- * do_exit(), but using this instead means there is less risk
- * for a process dying in kernel mode, possibly leaving an inode
- * unused etc..
- *
- * BAD_PAGETABLE is the accompanying page-table: it is initialized
- * to point to BAD_PAGE entries.
- *
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
-
-/*
- * These are allocated in head.S so that we get proper page alignment.
- * If you change the size of these then change head.S as well.
- */
-extern char empty_bad_page[PAGE_SIZE];
-#if CONFIG_X86_PAE
-extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD];
-#endif
-extern pte_t empty_bad_pte_table[PTRS_PER_PTE];
-
-/*
- * We init them before every return and make them writable-shared.
- * This guarantees we get out of the kernel in some more or less sane
- * way.
- */
-#if CONFIG_X86_PAE
-static pmd_t * get_bad_pmd_table(void)
-{
-	pmd_t v;
-	int i;
-
-	set_pmd(&v, __pmd(_PAGE_TABLE + __pa(empty_bad_pte_table)));
-
-	for (i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++)
-		empty_bad_pmd_table[i] = v;
-
-	return empty_bad_pmd_table;
-}
-#endif
-
-static pte_t * get_bad_pte_table(void)
-{
-	pte_t v;
-	int i;
-
-	v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED));
-
-	for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++)
-		empty_bad_pte_table[i] = v;
-
-	return empty_bad_pte_table;
-}
-
-
-
-void __handle_bad_pmd(pmd_t *pmd)
-{
-	pmd_ERROR(*pmd);
-	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(get_bad_pte_table())));
-}
-
-void __handle_bad_pmd_kernel(pmd_t *pmd)
-{
-	pmd_ERROR(*pmd);
-	set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(get_bad_pte_table())));
-}
-
 int do_check_pgt_cache(int low, int high)
 {
 	int freed = 0;
--- linux/arch/i386/kernel/head.S.orig	Sun Mar 25 19:04:23 2001
+++ linux/arch/i386/kernel/head.S	Sun Mar 25 19:04:57 2001
@@ -415,23 +415,6 @@
 ENTRY(empty_zero_page)
 
 .org 0x5000
-ENTRY(empty_bad_page)
-
-.org 0x6000
-ENTRY(empty_bad_pte_table)
-
-#if CONFIG_X86_PAE
-
- .org 0x7000
- ENTRY(empty_bad_pmd_table)
-
- .org 0x8000
-
-#else
-
- .org 0x7000
-
-#endif
 
 /*
  * This starts the data section. Note that the above is all
--- linux/MAINTAINERS.orig	Sun Mar 25 19:23:13 2001
+++ linux/MAINTAINERS	Sun Mar 25 19:24:46 2001
@@ -1454,6 +1454,11 @@
 L:	linux-x25@vger.kernel.org
 S:	Maintained
 
+X86 3-LEVEL PAGING (PAE) SUPPORT
+P:	Ingo Molnar
+M:	mingo@redhat.com
+S:	Maintained
+
 Z85230 SYNCHRONOUS DRIVER
 P:	Alan Cox
 M:	alan@redhat.com