ARM: rework MMU support
In barebox we used 1MiB sections to map our SDRAM cachable. This has the drawback that we have to map our sdram twice: cached for normal sdram and uncached for DMA operations. As address space gets sparse on newer systems we are sometines unable to find a suitably big enough area for the dma coherent space. This patch changes the MMU code to use second level page tables. With it we can implement dma_alloc_coherent as normal malloc, we just have to remap the allocated area uncached afterwards and map it cached again after free(). This makes arm_create_section(), setup_dma_coherent() and mmu_enable() noops. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
This commit is contained in:
parent
f9f35ee938
commit
3100ea1466
|
@ -1,10 +1,13 @@
|
|||
#include <common.h>
|
||||
#include <init.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <errno.h>
|
||||
#include <sizes.h>
|
||||
#include <asm/memory.h>
|
||||
|
||||
static unsigned long *ttb;
|
||||
|
||||
void arm_create_section(unsigned long virt, unsigned long phys, int size_m,
|
||||
static void create_section(unsigned long virt, unsigned long phys, int size_m,
|
||||
unsigned int flags)
|
||||
{
|
||||
int i;
|
||||
|
@ -23,6 +26,33 @@ void arm_create_section(unsigned long virt, unsigned long phys, int size_m,
|
|||
);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do it the simple way for now and invalidate the entire
|
||||
* tlb
|
||||
*/
|
||||
static inline void tlb_invalidate(void)
|
||||
{
|
||||
asm volatile (
|
||||
"mov r0, #0\n"
|
||||
"mcr p15, 0, r0, c7, c10, 4; @ drain write buffer\n"
|
||||
"mcr p15, 0, r0, c8, c6, 0; @ invalidate D TLBs\n"
|
||||
"mcr p15, 0, r0, c8, c5, 0; @ invalidate I TLBs\n"
|
||||
:
|
||||
:
|
||||
: "r0"
|
||||
);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_V7
|
||||
#define PTE_FLAGS_CACHED (PTE_EXT_TEX(1) | PTE_BUFFERABLE | PTE_CACHEABLE)
|
||||
#define PTE_FLAGS_UNCACHED (0)
|
||||
#else
|
||||
#define PTE_FLAGS_CACHED (PTE_SMALL_AP_UNO_SRW | PTE_BUFFERABLE | PTE_CACHEABLE)
|
||||
#define PTE_FLAGS_UNCACHED PTE_SMALL_AP_UNO_SRW
|
||||
#endif
|
||||
|
||||
#define PTE_MASK ((1 << 12) - 1)
|
||||
|
||||
/*
|
||||
* Create a second level translation table for the given virtual address.
|
||||
* We initially create a flat uncached mapping on it.
|
||||
|
@ -38,11 +68,89 @@ static u32 *arm_create_pte(unsigned long virt)
|
|||
ttb[virt] = (unsigned long)table | PMD_TYPE_TABLE;
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
table[i] = virt | PTE_TYPE_SMALL | PTE_SMALL_AP_UNO_SRW;
|
||||
table[i] = virt | PTE_TYPE_SMALL | PTE_FLAGS_UNCACHED;
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
static void remap_range(void *_start, size_t size, uint32_t flags)
|
||||
{
|
||||
u32 pteentry;
|
||||
struct arm_memory *mem;
|
||||
unsigned long start = (unsigned long)_start;
|
||||
u32 *p;
|
||||
int numentries, i;
|
||||
|
||||
for_each_sdram_bank(mem) {
|
||||
if (start >= mem->start && start < mem->start + mem->size)
|
||||
goto found;
|
||||
}
|
||||
|
||||
BUG();
|
||||
return;
|
||||
|
||||
found:
|
||||
pteentry = (start - mem->start) >> PAGE_SHIFT;
|
||||
|
||||
numentries = size >> PAGE_SHIFT;
|
||||
|
||||
p = mem->ptes + pteentry;
|
||||
|
||||
for (i = 0; i < numentries; i++) {
|
||||
p[i] &= ~PTE_MASK;
|
||||
p[i] |= flags | PTE_TYPE_SMALL;
|
||||
}
|
||||
|
||||
dma_flush_range((unsigned long)p,
|
||||
(unsigned long)p + numentries * sizeof(u32));
|
||||
|
||||
tlb_invalidate();
|
||||
}
|
||||
|
||||
/*
|
||||
* remap the memory bank described by mem cachable and
|
||||
* bufferable
|
||||
*/
|
||||
static int arm_mmu_remap_sdram(struct arm_memory *mem)
|
||||
{
|
||||
unsigned long phys = (unsigned long)mem->start;
|
||||
unsigned long ttb_start = phys >> 20;
|
||||
unsigned long ttb_end = (phys + mem->size) >> 20;
|
||||
unsigned long num_ptes = mem->size >> 10;
|
||||
int i, pte;
|
||||
|
||||
debug("remapping SDRAM from 0x%08lx (size 0x%08lx)\n",
|
||||
phys, mem->size);
|
||||
|
||||
/*
|
||||
* We replace each 1MiB section in this range with second level page
|
||||
* tables, therefore we must have 1Mib aligment here.
|
||||
*/
|
||||
if ((phys & (SZ_1M - 1)) || (mem->size & (SZ_1M - 1)))
|
||||
return -EINVAL;
|
||||
|
||||
mem->ptes = memalign(0x400, num_ptes * sizeof(u32));
|
||||
|
||||
debug("ptes: 0x%p ttb_start: 0x%08lx ttb_end: 0x%08lx\n",
|
||||
mem->ptes, ttb_start, ttb_end);
|
||||
|
||||
for (i = 0; i < num_ptes; i++) {
|
||||
mem->ptes[i] = (phys + i * 4096) | PTE_TYPE_SMALL |
|
||||
PTE_FLAGS_CACHED;
|
||||
}
|
||||
|
||||
pte = 0;
|
||||
|
||||
for (i = ttb_start; i < ttb_end; i++) {
|
||||
ttb[i] = (unsigned long)(&mem->ptes[pte]) | PMD_TYPE_TABLE |
|
||||
(0 << 4);
|
||||
pte += 256;
|
||||
}
|
||||
|
||||
tlb_invalidate();
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* We have 8 exception vectors and the table consists of absolute
|
||||
* jumps, so we need 8 * 4 bytes for the instructions and another
|
||||
|
@ -66,19 +174,21 @@ static void vectors_init(void)
|
|||
memset(vectors, 0, PAGE_SIZE);
|
||||
memcpy(vectors, &exception_vectors, ARM_VECTORS_SIZE);
|
||||
|
||||
exc[0] = (u32)vectors | PTE_TYPE_SMALL | PTE_SMALL_AP_UNO_SRW;
|
||||
exc[0] = (u32)vectors | PTE_TYPE_SMALL | PTE_FLAGS_CACHED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare MMU for usage and create a flat mapping. Board
|
||||
* code is responsible to remap the SDRAM cached
|
||||
* Prepare MMU for usage enable it.
|
||||
*/
|
||||
void mmu_init(void)
|
||||
int mmu_init(void)
|
||||
{
|
||||
struct arm_memory *mem;
|
||||
int i;
|
||||
|
||||
ttb = memalign(0x10000, 0x4000);
|
||||
|
||||
debug("ttb: 0x%p\n", ttb);
|
||||
|
||||
/* Set the ttb register */
|
||||
asm volatile ("mcr p15,0,%0,c2,c0,0" : : "r"(ttb) /*:*/);
|
||||
|
||||
|
@ -86,23 +196,36 @@ void mmu_init(void)
|
|||
i = 0x3;
|
||||
asm volatile ("mcr p15,0,%0,c3,c0,0" : : "r"(i) /*:*/);
|
||||
|
||||
/* create a flat mapping */
|
||||
arm_create_section(0, 0, 4096, PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT);
|
||||
/* create a flat mapping using 1MiB sections */
|
||||
create_section(0, 0, 4096, PMD_SECT_AP_WRITE | PMD_SECT_AP_READ |
|
||||
PMD_TYPE_SECT);
|
||||
|
||||
vectors_init();
|
||||
}
|
||||
|
||||
/*
|
||||
* enable the MMU. Should be called after mmu_init()
|
||||
*/
|
||||
void mmu_enable(void)
|
||||
{
|
||||
/*
|
||||
* First remap sdram cached using sections.
|
||||
* This is to speed up the generation of 2nd level page tables
|
||||
* below
|
||||
*/
|
||||
for_each_sdram_bank(mem)
|
||||
create_section(mem->start, mem->start, mem->size >> 20,
|
||||
PMD_SECT_DEF_CACHED);
|
||||
|
||||
asm volatile (
|
||||
"bl __mmu_cache_on;"
|
||||
:
|
||||
:
|
||||
: "r0", "r1", "r2", "r3", "r6", "r10", "r12", "cc", "memory"
|
||||
);
|
||||
|
||||
/*
|
||||
* Now that we have the MMU and caches on remap sdram again using
|
||||
* page tables
|
||||
*/
|
||||
for_each_sdram_bank(mem)
|
||||
arm_mmu_remap_sdram(mem);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct outer_cache_fns outer_cache;
|
||||
|
@ -125,39 +248,41 @@ void mmu_disable(void)
|
|||
);
|
||||
}
|
||||
|
||||
/*
|
||||
* For boards which need coherent memory for DMA. The idea
|
||||
* is simple: Setup a uncached section containing your SDRAM
|
||||
* and call setup_dma_coherent() with the offset between the
|
||||
* cached and the uncached section. dma_alloc_coherent() then
|
||||
* works using normal malloc but returns the corresponding
|
||||
* pointer in the uncached area.
|
||||
*/
|
||||
static unsigned long dma_coherent_offset;
|
||||
|
||||
void setup_dma_coherent(unsigned long offset)
|
||||
{
|
||||
dma_coherent_offset = offset;
|
||||
}
|
||||
#define PAGE_ALIGN(s) ((s) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
|
||||
|
||||
void *dma_alloc_coherent(size_t size)
|
||||
{
|
||||
return xmemalign(4096, size) + dma_coherent_offset;
|
||||
void *ret;
|
||||
|
||||
size = PAGE_ALIGN(size);
|
||||
ret = xmemalign(4096, size);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
dma_inv_range((unsigned long)ret, (unsigned long)ret + size);
|
||||
|
||||
remap_range(ret, size, PTE_FLAGS_UNCACHED);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned long virt_to_phys(void *virt)
|
||||
{
|
||||
return (unsigned long)virt - dma_coherent_offset;
|
||||
return (unsigned long)virt;
|
||||
}
|
||||
|
||||
void *phys_to_virt(unsigned long phys)
|
||||
{
|
||||
return (void *)(phys + dma_coherent_offset);
|
||||
return (void *)phys;
|
||||
}
|
||||
|
||||
void dma_free_coherent(void *mem, size_t size)
|
||||
{
|
||||
free(mem - dma_coherent_offset);
|
||||
#ifdef CONFIG_MMU
|
||||
remap_range(mem, size, PTE_FLAGS_CACHED);
|
||||
#endif
|
||||
|
||||
free(mem);
|
||||
}
|
||||
|
||||
void dma_clean_range(unsigned long start, unsigned long end)
|
||||
|
|
|
@ -3,19 +3,29 @@
|
|||
|
||||
#include <asm/pgtable.h>
|
||||
#include <malloc.h>
|
||||
#include <errno.h>
|
||||
|
||||
#define PMD_SECT_DEF_UNCACHED (PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT)
|
||||
#define PMD_SECT_DEF_CACHED (PMD_SECT_WB | PMD_SECT_DEF_UNCACHED)
|
||||
|
||||
void mmu_init(void);
|
||||
void mmu_enable(void);
|
||||
void mmu_disable(void);
|
||||
void arm_create_section(unsigned long virt, unsigned long phys, int size_m,
|
||||
unsigned int flags);
|
||||
struct arm_memory;
|
||||
|
||||
void setup_dma_coherent(unsigned long offset);
|
||||
static inline void mmu_enable(void)
|
||||
{
|
||||
}
|
||||
void mmu_disable(void);
|
||||
static inline void arm_create_section(unsigned long virt, unsigned long phys, int size_m,
|
||||
unsigned int flags)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void setup_dma_coherent(unsigned long offset)
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
int mmu_init(void);
|
||||
|
||||
void *dma_alloc_coherent(size_t size);
|
||||
void dma_free_coherent(void *mem, size_t size);
|
||||
|
||||
|
|
|
@ -221,6 +221,7 @@ int run_shell(void);
|
|||
#define ULLONG_MAX (~0ULL)
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define PAGE_SHIFT 12
|
||||
|
||||
int memory_display(char *addr, ulong offs, ulong nbytes, int size);
|
||||
|
||||
|
|
Loading…
Reference in New Issue