453 lines
13 KiB
C++
453 lines
13 KiB
C++
#include <cstdint>
|
|
#include <cstring>
|
|
#include <cstdio>
|
|
#include <cassert>
|
|
|
|
#include "phys_mm.hh"
|
|
#include "cortexa8.hh"
|
|
|
|
extern int _vect_table;
|
|
|
|
uint32_t cortexa8::read_cpuid(int reg) noexcept {
|
|
uint32_t rval;
|
|
switch(reg) {
|
|
case CORTEXA8_CPUID_MAINID:
|
|
__asm__ __volatile__ ("mrc 15, 0, %[res], c0, c0, 0"
|
|
: [res] "=r"(rval));
|
|
break;
|
|
case CORTEXA8_CPUID_CACHETYPE:
|
|
__asm__ __volatile__ ("mrc 15, 0, %[res], c0, c0, 1"
|
|
: [res] "=r"(rval));
|
|
break;
|
|
case CORTEXA8_CPUID_TLBTYPE:
|
|
__asm__ __volatile__ ("mrc 15, 0, %[res], c0, c0, 3"
|
|
: [res] "=r"(rval));
|
|
break;
|
|
case CORTEXA8_CPUID_PFR0:
|
|
__asm__ __volatile__ ("mrc 15, 0, %[res], c0, c1, 0"
|
|
: [res] "=r"(rval));
|
|
break;
|
|
case CORTEXA8_CPUID_PFR1:
|
|
__asm__ __volatile__ ("mrc 15, 0, %[res], c0, c1, 1"
|
|
: [res] "=r"(rval));
|
|
break;
|
|
default:
|
|
rval = 0xdeadbeef;
|
|
break;
|
|
}
|
|
return rval;
|
|
}
|
|
|
|
void cortexa8::enable_icache() noexcept {
|
|
uint32_t reg;
|
|
|
|
__asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; orr %[reg], %[reg], #0x1000; mcr 15, 0, %[reg], c1, c0, 0; isb"
|
|
: [reg] "=r"(reg));
|
|
}
|
|
|
|
void cortexa8::enable_dcache() noexcept {
|
|
uint32_t reg;
|
|
|
|
__asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; orr %[reg], %[reg], #0x4; mcr 15, 0, %[reg], c1, c0, 0"
|
|
: [reg] "=r"(reg));
|
|
}
|
|
|
|
void cortexa8::disable_icache() noexcept {
|
|
uint32_t reg;
|
|
|
|
__asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; bic %[reg], %[reg], #0x1000; mcr 15, 0, %[reg], c1, c0, 0; isb"
|
|
: [reg] "=r"(reg));
|
|
}
|
|
|
|
void cortexa8::disable_dcache() noexcept {
|
|
uint32_t reg;
|
|
|
|
__asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; bic %[reg], %[reg], #0x4; mcr 15, 0, %[reg], c1, c0, 0"
|
|
: [reg] "=r"(reg));
|
|
}
|
|
|
|
struct ptentry_section_t {
|
|
unsigned :1;
|
|
unsigned one:1;
|
|
unsigned b:1;
|
|
unsigned c:1;
|
|
unsigned xn:1;
|
|
unsigned domain:4;
|
|
unsigned :1;
|
|
unsigned ap:2;
|
|
unsigned tex:3;
|
|
unsigned ap2:1;
|
|
unsigned s:1;
|
|
unsigned ng:1;
|
|
unsigned :1;
|
|
unsigned ns:1;
|
|
unsigned baseadr:12;
|
|
};
|
|
|
|
struct ptentry_ptl2_t {
|
|
unsigned one:1;
|
|
unsigned :2;
|
|
unsigned ns:1;
|
|
unsigned :1;
|
|
unsigned domain:4;
|
|
unsigned :1;
|
|
unsigned ptadr:22;
|
|
};
|
|
|
|
struct ptl2entry_page_t {
|
|
unsigned xn:1;
|
|
unsigned one:1;
|
|
unsigned b:1;
|
|
unsigned c:1;
|
|
unsigned ap:2;
|
|
unsigned tex:3;
|
|
unsigned ap2:1;
|
|
unsigned s:1;
|
|
unsigned ng:1;
|
|
unsigned baseadr:20;
|
|
};
|
|
|
|
// Import symbols from linker
|
|
extern uint32_t _kernel_ptl1;
|
|
extern uint32_t _kernel_ptl2;
|
|
extern uint32_t _kernel_real_numl2pt;
|
|
extern uint32_t _kernel_numl2ent;
|
|
extern uint32_t __io_start;
|
|
|
|
// Fix some variables
|
|
static const uintptr_t io_start = (uintptr_t)&__io_start;
|
|
static const unsigned numl2pt = (unsigned)&_kernel_real_numl2pt;
|
|
static const unsigned numl2ent = (unsigned)&_kernel_numl2ent;
|
|
|
|
static volatile uint32_t *const ttable1 = &_kernel_ptl1;
|
|
static volatile uint32_t *const kern_ptl2 = &_kernel_ptl2;
|
|
static uint32_t volatile* ttable1_virt[4096];
|
|
|
|
// Map console UART I/O region statically, for debug output while
|
|
// initializing memory management
|
|
static volatile ptl2entry_page_t ttable2_earlyio[256] __attribute__((aligned(1024)));
|
|
|
|
// Map first section of pagetable map area
|
|
static volatile ptl2entry_page_t ttable2_ptmap[256] __attribute__((aligned(1024)));
|
|
|
|
bool _mmu_is_init = false;
|
|
static unsigned ptmap_area_top = 256, ptmap_area_free = 0;
|
|
|
|
static void _init_ttable() noexcept {
|
|
memset((void*)ttable1, 0, 16384);
|
|
|
|
memset((void*)kern_ptl2, 0, 1024*numl2pt);
|
|
memset((void*)ttable2_earlyio, 0, 1024);
|
|
memset((void*)ttable2_ptmap, 0, 1024);
|
|
|
|
for(unsigned i = 0;i < numl2pt;++i) {
|
|
ttable1_virt[2048+i] = kern_ptl2 + 256*i;
|
|
ttable1[2048+i] = (uint32_t)(kern_ptl2+256*i) | 0x1;
|
|
}
|
|
ttable1_virt[4095] = (uint32_t*)ttable2_earlyio;
|
|
ttable1[4095] = (uint32_t)(ttable2_earlyio) | 0x1;
|
|
ttable1_virt[3072] = (uint32_t*)ttable2_ptmap;
|
|
ttable1[3072] = (uint32_t)(ttable2_ptmap) | 0x1;
|
|
|
|
for(unsigned i = 0;i < numl2ent;++i) {
|
|
kern_ptl2[i] = (0x80000000+0x1000*i) | 0x576;
|
|
}
|
|
|
|
ttable2_earlyio[255].baseadr = 0x49020000>>12;
|
|
ttable2_earlyio[255].one = 1;
|
|
ttable2_earlyio[255].ap2 = 0;
|
|
ttable2_earlyio[255].ap = 0x3;
|
|
ttable2_earlyio[255].tex = 0;
|
|
ttable2_earlyio[255].c = 0;
|
|
ttable2_earlyio[255].b = 1;
|
|
}
|
|
|
|
// static ptentry_section_t ttable1[4096] __attribute__((aligned(16384)));
|
|
|
|
// //int test[1048576] __attribute__((__used__));
|
|
|
|
// static void _init_ttable() noexcept {
|
|
// // Create 1:1 translation table for entire address space with appropriate memory types
|
|
// memset((void*)ttable1, 0, 16384);
|
|
|
|
// // 0x00000000..0x7fffffff MMIO (Non-cacheable)
|
|
// for(int i = 0;i < 2048;++i) {
|
|
// ttable1[i].baseadr = i;
|
|
// ttable1[i].one = 1;
|
|
|
|
// // Read/write at any privilege
|
|
// ttable1[i].ap2 = 0;
|
|
// ttable1[i].ap = 0x3;
|
|
// // Shareable device
|
|
// ttable1[i].tex = 0;
|
|
// ttable1[i].c = 0;
|
|
// ttable1[1].b = 1;
|
|
// };
|
|
|
|
// // 0x80000000..0x8fffffff RAM (Cacheable)
|
|
// for(int i = 2048;i < 2304;++i) {
|
|
// ttable1[i].baseadr = i;
|
|
// ttable1[i].one = 1;
|
|
|
|
// // Read/write at any privilege
|
|
// ttable1[i].ap2 = 0;
|
|
// ttable1[i].ap = 0x3;
|
|
// // Cacheable
|
|
// ttable1[i].tex = 0x5;
|
|
// ttable1[i].c = 0;
|
|
// ttable1[i].b = 1;
|
|
// ttable1[i].s = 1;
|
|
// };
|
|
|
|
// // 0x90000000..0xffffffff ??? (Non-cacheable)
|
|
// for(int i = 2304;i < 4095;++i) {
|
|
// ttable1[i].baseadr = i;
|
|
// ttable1[i].one = 1;
|
|
|
|
// // Read/write at any privilege
|
|
// ttable1[i].ap2 = 0;
|
|
// ttable1[i].ap = 0x3;
|
|
// // Shareable device
|
|
// ttable1[i].tex = 0;
|
|
// ttable1[i].c = 0;
|
|
// ttable1[1].b = 1;
|
|
// };
|
|
|
|
// }
|
|
|
|
void cortexa8::init_mmu() noexcept {
|
|
_init_ttable();
|
|
|
|
// Set Translation Table Base Register 1
|
|
uint32_t reg = ((uint32_t)ttable1);
|
|
reg |= 0xb;
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 1"
|
|
: : [val] "r"(reg));
|
|
|
|
// Set Translation Table Base Control Register
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 2"
|
|
: : [val] "r"(0x01));
|
|
|
|
// Set domains register
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c3, c0, 0"
|
|
: : [val] "r"(0x1));
|
|
|
|
// Flush L1 page table from L1$
|
|
for(int i = 0;i < 256;++i) {
|
|
reg = ((uint32_t)ttable1)+i*64;
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
|
|
: : [val] "r"(reg));
|
|
}
|
|
// Flush L2 page tables from L1$
|
|
for(unsigned j = 0;j < numl2pt;++j)
|
|
for(int i = 0;i < 16;++i) {
|
|
reg = ((uint32_t)kern_ptl2)+j*1024+i*64;
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
|
|
: : [val] "r"(reg));
|
|
}
|
|
for(int i = 0;i < 16;++i) {
|
|
reg = ((uint32_t)ttable2_earlyio)+i*64;
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
|
|
: : [val] "r"(reg));
|
|
}
|
|
for(int i = 0;i < 16;++i) {
|
|
reg = ((uint32_t)ttable2_ptmap)+i*64;
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
|
|
: : [val] "r"(reg));
|
|
}
|
|
__asm__ __volatile__ ("dsb");
|
|
|
|
// Invalidate TLBs
|
|
__asm__ __volatile__ ("mcr 15, 0, r0, c8, c5, 0 ; mcr 15, 0, r0, c8, c6, 0; isb");
|
|
|
|
// Enable MMU
|
|
__asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; orr %[reg], %[reg], #0x1; mcr 15, 0, %[reg], c1, c0, 0; isb"
|
|
: [reg] "=r"(reg));
|
|
|
|
_mmu_is_init = true;
|
|
}
|
|
|
|
static void _map_page(unsigned l1, unsigned l2, uintptr_t phys, uint32_t mode) {
|
|
ttable1_virt[l1][l2] = phys | mode;
|
|
}
|
|
|
|
static bool recursing = false;
|
|
|
|
void cortexa8::map_pages(uintptr_t virt, uintptr_t phys, unsigned count) {
|
|
assert(_mmu_is_init);
|
|
// Check alignment
|
|
assert((virt&0xfff) == 0);
|
|
assert((phys&0xfff) == 0);
|
|
|
|
unsigned virt_l1 = virt/1048576, virt_l2 = (virt%1048576)/4096;
|
|
unsigned virt_end_l1 = (virt+count*4096-1)/1048576, virt_end_l2 = ((virt+count*4096-1)%1048576)/4096;
|
|
if(virt_l1 != virt_end_l1) { // Allocation spans multiple L2 pagetables
|
|
assert(!recursing);
|
|
for(unsigned i = virt_l1;i <= virt_end_l1;++i) {
|
|
unsigned cur_l2 = (virt%1048576)/4096;
|
|
unsigned pages_on_i = (cur_l2+count>256)?(256-cur_l2):count;
|
|
recursing = true;
|
|
map_pages(virt, phys, pages_on_i);
|
|
count -= pages_on_i;
|
|
virt += pages_on_i*4096;
|
|
phys += pages_on_i*4096;
|
|
}
|
|
recursing = false;
|
|
}
|
|
|
|
bool hw = (virt>=io_start);
|
|
|
|
// Check if L2 pagetable for area exists
|
|
if(ttable1_virt[virt_l1] == 0) {
|
|
// Allocate and build new L2 pt
|
|
if(ptmap_area_free == ptmap_area_top) {
|
|
// Expand L2 pagetable map area
|
|
assert(false && "NYI");
|
|
}
|
|
|
|
// Allocate memory
|
|
uintptr_t newpt_p = phys_mm::alloc(1);
|
|
// Map to pt map area
|
|
uintptr_t newpt_v = 0xc0000000 + ptmap_area_free++*4096;
|
|
map_pages(newpt_v, newpt_p, 1);
|
|
|
|
// Install new L2 page table
|
|
memset((void*)newpt_v, 0, 4096);
|
|
// There are 4 L2 page tables on a page
|
|
for(int i = 0;i < 4;++i) {
|
|
ttable1_virt[virt_l1+i] = (uint32_t*)(newpt_v+1024*i);
|
|
ttable1[virt_l1+i] = (newpt_p+1024*i) | 0x1;
|
|
}
|
|
// Flush L1 pt entry from L1D$
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1; dsb"
|
|
: : [val] "r"(ttable1+virt_l1));
|
|
if(((uintptr_t)(ttable1+virt_l1)%64) != ((uintptr_t)(ttable1+virt_l1+16)%64))
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1; dsb"
|
|
: : [val] "r"(ttable1+virt_l1+16));
|
|
}
|
|
|
|
// Map in L2 pt
|
|
for(unsigned i = virt_l2;i <= virt_end_l2;++i) {
|
|
_map_page(virt_l1, i, phys, hw?0x036:0x576);
|
|
phys += 4096;
|
|
}
|
|
|
|
// Flush L2 pt from L1D$
|
|
unsigned l2_cache_s = virt_l2/16, l2_cache_e = virt_end_l2/16 + (virt_end_l2%16==0)?0:1;
|
|
for(unsigned i = l2_cache_s;i <= l2_cache_e;++i) {
|
|
uint32_t reg = ((uint32_t)ttable1_virt[virt_l1])+i*64;
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
|
|
: : [val] "r"(reg));
|
|
}
|
|
__asm__ __volatile__ ("dsb");
|
|
|
|
// Invalidate TLBs
|
|
for(uintptr_t va = virt; va < virt+4096*count;va += 4096)
|
|
__asm__ __volatile__("mcr 15, 0, %[mva], c8, c7, 1"
|
|
: : [mva] "r"(va));
|
|
|
|
__asm__ __volatile__ ("isb");
|
|
}
|
|
|
|
void cortexa8::unmap_pages(uintptr_t virt, unsigned count) {
|
|
assert(_mmu_is_init);
|
|
// Check alignment
|
|
assert((virt&0xfff) == 0);
|
|
|
|
unsigned virt_l1 = virt/1048576, virt_l2 = (virt%1048576)/4096;
|
|
unsigned virt_end_l1 = (virt+count*4096-1)/1048576, virt_end_l2 = ((virt+count*4096-1)%1048576)/4096;
|
|
|
|
// Clear L2 pagetable entries
|
|
for(unsigned l1 = virt_l1;l1 <= virt_end_l1;++l1) {
|
|
assert(ttable1_virt[l1] != nullptr);
|
|
for(unsigned l2 = (l1==virt_l1)?virt_l2:0;l2 <= (l1==virt_end_l1)?virt_end_l2:255;++l2) {
|
|
ttable1_virt[l1][l2] = 0;
|
|
}
|
|
}
|
|
|
|
// Flush L2 pagetables from L1D$
|
|
for(unsigned l1 = virt_l1;l1 <= virt_end_l1;++l1) {
|
|
for(unsigned cl = (l1==virt_l1)?virt_l2/16:0;
|
|
cl <= (l1==virt_end_l1)?(virt_end_l2/16+((virt_end_l2%16==0)?0:1)):15;
|
|
++cl) {
|
|
__asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
|
|
: : [val] "r"(ttable1_virt[l1]+cl*16));
|
|
}
|
|
}
|
|
__asm__ __volatile__ ("dsb");
|
|
|
|
// Invalidate TLBs
|
|
for(uintptr_t va = virt; va < virt+4096*count;va += 4096)
|
|
__asm__ __volatile__("mcr 15, 0, %[mva], c8, c7, 1"
|
|
: : [mva] "r"(va));
|
|
|
|
__asm__ __volatile__ ("isb");
|
|
}
|
|
|
|
|
|
extern uint32_t __stack_excp;
|
|
extern uint32_t __stack_int;
|
|
void cortexa8::init_handlers() noexcept {
|
|
// Set stack pointers
|
|
cortexa8_set_und_sp(&__stack_excp);
|
|
cortexa8_set_abt_sp(&__stack_excp);
|
|
cortexa8_set_irq_sp(&__stack_int);
|
|
cortexa8_set_fiq_sp(&__stack_int);
|
|
|
|
// Set VBAR
|
|
__asm__ __volatile__("mcr 15, 0, %[reg], c12, c0, 0"
|
|
: : [reg] "r"(&_vect_table));
|
|
|
|
|
|
}
|
|
|
|
// Default interrupt / exception handlers
|
|
extern "C" {
|
|
|
|
void _cortexa8_excp_data_abt() __attribute__((interrupt ("ABORT")));
|
|
void _cortexa8_excp_pf_abt() __attribute__((interrupt ("ABORT")));
|
|
void _cortexa8_excp_undef() __attribute__((interrupt ("UNDEF")));
|
|
void _cortexa8_syscall() __attribute__((interrupt ("SWI")));
|
|
void _cortexa8_unhandled_fiq() __attribute__((interrupt ("FIQ")));
|
|
|
|
}
|
|
|
|
void _cortexa8_excp_data_abt() {
|
|
uint32_t lr, dfar, dfsr;
|
|
__asm__ ("mov %[lr], lr; mrc 15, 0, %[dfsr], c5, c0, 0; mrc 15, 0, %[dfar], c6, c0, 0"
|
|
: [lr] "=r"(lr), [dfsr] "=r"(dfsr), [dfar] "=r"(dfar));
|
|
printf("ERROR: Data abort\n");
|
|
printf("PC: %.8lx Fault Address: %.8lx Fault code: %.lx\n",
|
|
lr-8, dfar, dfsr&0x4);
|
|
while(1) {}
|
|
}
|
|
|
|
void _cortexa8_excp_pf_abt() {
|
|
uint32_t lr, ifar, ifsr;
|
|
__asm__ ("mov %[lr], lr; mrc 15, 0, %[ifsr], c5, c0, 1; mrc 15, 0, %[ifar], c6, c0, 2"
|
|
: [lr] "=r"(lr), [ifsr] "=r"(ifsr), [ifar] "=r"(ifar));
|
|
printf("ERROR: Prefetch abort\n");
|
|
printf("PC: %.8lx Fault Address: %.8lx Fault code: %.lx\n",
|
|
lr-4, ifar, ifsr&0x4);
|
|
while(1) {}
|
|
}
|
|
|
|
void _cortexa8_excp_undef() {
|
|
uint32_t lr, spsr;
|
|
__asm__ ("mov %[lr], lr"
|
|
: [lr] "=r"(lr));
|
|
spsr = cortexa8_get_spsr();
|
|
printf("ERROR: Undefined instruction\n");
|
|
printf("PC: %.8lx\n",
|
|
lr-((spsr&0x20)?2:4));
|
|
while(1) {}
|
|
}
|
|
|
|
void _cortexa8_syscall() {
|
|
printf("Syscall NYI\n");
|
|
}
|
|
|
|
void _cortexa8_unhandled_fiq() {
|
|
printf("UNHANDLED FINTERRUPT\n");
|
|
}
|