[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: hugepage patches



1/4

Using a futex in a large page causes a kernel lockup in __pin_page() -
because __pin_page's page revalidation uses follow_page(), and follow_page()
doesn't work for hugepages.

The patch fixes up follow_page() to return the appropriate 4k page for
hugepages.

This incurs a vma lookup for each follow_page(), which is considerable
overhead in some situations.  We only _need_ to do this if the architecture
cannot determin a page's hugeness from the contents of the PMD.

So this patch is a "reference" implementation for, say, PPC BAT-based
hugepages.




 arch/i386/mm/hugetlbpage.c |   29 +++++++++++++++++++++++++++++
 include/linux/hugetlb.h    |   18 ++++++++++++++++--
 include/linux/sched.h      |    4 +++-
 mm/memory.c                |    5 +++++
 mm/mmap.c                  |    2 +-
 linux/mm.h                 |    0 
 6 files changed, 54 insertions(+), 4 deletions(-)

diff -puN mm/memory.c~pin_page-fix mm/memory.c
--- 25/mm/memory.c~pin_page-fix	Fri Jan 31 13:32:13 2003
+++ 25-akpm/mm/memory.c	Fri Jan 31 14:29:59 2003
@@ -607,6 +607,11 @@ follow_page(struct mm_struct *mm, unsign
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 	unsigned long pfn;
+	struct vm_area_struct *vma;
+
+	vma = hugepage_vma(mm, address);
+	if (vma)
+		return follow_huge_addr(mm, vma, address, write);
 
 	pgd = pgd_offset(mm, address);
 	if (pgd_none(*pgd) || pgd_bad(*pgd))
diff -puN include/linux/hugetlb.h~pin_page-fix include/linux/hugetlb.h
--- 25/include/linux/hugetlb.h~pin_page-fix	Fri Jan 31 13:32:13 2003
+++ 25-akpm/include/linux/hugetlb.h	Fri Jan 31 14:29:59 2003
@@ -20,16 +20,28 @@ int hugetlb_prefault(struct address_spac
 void huge_page_release(struct page *);
 int hugetlb_report_meminfo(char *);
 int is_hugepage_mem_enough(size_t);
-
+struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address, int write);
+struct vm_area_struct *hugepage_vma(struct mm_struct *mm,
+					unsigned long address);
 extern int htlbpage_max;
 
+static inline void
+mark_mm_hugetlb(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	if (is_vm_hugetlb_page(vma))
+		mm->used_hugetlb = 1;
+}
+
 #else /* !CONFIG_HUGETLB_PAGE */
+
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
 	return 0;
 }
 
-#define follow_hugetlb_page(m,v,p,vs,a,b,i)		({ BUG(); 0; })
+#define follow_hugetlb_page(m,v,p,vs,a,b,i)	({ BUG(); 0; })
+#define follow_huge_addr(mm, vma, addr, write)	0
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
 #define zap_hugepage_range(vma, start, len)	BUG()
@@ -37,6 +49,8 @@ static inline int is_vm_hugetlb_page(str
 #define huge_page_release(page)			BUG()
 #define is_hugepage_mem_enough(size)		0
 #define hugetlb_report_meminfo(buf)		0
+#define hugepage_vma(mm, addr)			0
+#define mark_mm_hugetlb(mm, vma)		do { } while (0)
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
diff -puN arch/i386/mm/hugetlbpage.c~pin_page-fix arch/i386/mm/hugetlbpage.c
--- 25/arch/i386/mm/hugetlbpage.c~pin_page-fix	Fri Jan 31 13:32:13 2003
+++ 25-akpm/arch/i386/mm/hugetlbpage.c	Fri Jan 31 14:29:59 2003
@@ -150,6 +150,35 @@ back1:
 	return i;
 }
 
+struct page *
+follow_huge_addr(struct mm_struct *mm,
+	struct vm_area_struct *vma, unsigned long address, int write)
+{
+	unsigned long start = address;
+	int length = 1;
+	int nr;
+	struct page *page;
+
+	nr = follow_hugetlb_page(mm, vma, &page, NULL, &start, &length, 0);
+	if (nr == 1)
+		return page;
+	return NULL;
+}
+
+/*
+ * If virtual address `addr' lies within a huge page, return its controlling
+ * VMA, else NULL.
+ */
+struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
+{
+	if (mm->used_hugetlb) {
+		struct vm_area_struct *vma = find_vma(mm, addr);
+		if (vma && is_vm_hugetlb_page(vma))
+			return vma;
+	}
+	return NULL;
+}
+
 void free_huge_page(struct page *page)
 {
 	BUG_ON(page_count(page));
diff -puN mm/mmap.c~pin_page-fix mm/mmap.c
--- 25/mm/mmap.c~pin_page-fix	Fri Jan 31 13:32:13 2003
+++ 25-akpm/mm/mmap.c	Fri Jan 31 13:32:13 2003
@@ -362,6 +362,7 @@ static void vma_link(struct mm_struct *m
 	if (mapping)
 		up(&mapping->i_shared_sem);
 
+	mark_mm_hugetlb(mm, vma);
 	mm->map_count++;
 	validate_mm(mm);
 }
@@ -1427,7 +1428,6 @@ void exit_mmap(struct mm_struct *mm)
 		kmem_cache_free(vm_area_cachep, vma);
 		vma = next;
 	}
-		
 }
 
 /* Insert vm structure into process list sorted by address
diff -puN include/linux/mm.h~pin_page-fix include/linux/mm.h
diff -puN include/linux/sched.h~pin_page-fix include/linux/sched.h
--- 25/include/linux/sched.h~pin_page-fix	Fri Jan 31 13:32:13 2003
+++ 25-akpm/include/linux/sched.h	Fri Jan 31 13:32:13 2003
@@ -203,7 +203,9 @@ struct mm_struct {
 	unsigned long swap_address;
 
 	unsigned dumpable:1;
-
+#ifdef CONFIG_HUGETLB_PAGE
+	int used_hugetlb;
+#endif
 	/* Architecture-specific MM context */
 	mm_context_t context;
 

_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/