#include #include #include #include #include "phys_mm.hh" #include "cortexa8.hh" extern int _vect_table; static cortexa8::thread_cb initial_thread_cb; cortexa8::thread_cb *_current_thread_cb = &initial_thread_cb; uint32_t cortexa8::read_cpuid(int reg) noexcept { uint32_t rval; switch(reg) { case CORTEXA8_CPUID_MAINID: __asm__ __volatile__ ("mrc 15, 0, %[res], c0, c0, 0" : [res] "=r"(rval)); break; case CORTEXA8_CPUID_CACHETYPE: __asm__ __volatile__ ("mrc 15, 0, %[res], c0, c0, 1" : [res] "=r"(rval)); break; case CORTEXA8_CPUID_TLBTYPE: __asm__ __volatile__ ("mrc 15, 0, %[res], c0, c0, 3" : [res] "=r"(rval)); break; case CORTEXA8_CPUID_PFR0: __asm__ __volatile__ ("mrc 15, 0, %[res], c0, c1, 0" : [res] "=r"(rval)); break; case CORTEXA8_CPUID_PFR1: __asm__ __volatile__ ("mrc 15, 0, %[res], c0, c1, 1" : [res] "=r"(rval)); break; default: rval = 0xdeadbeef; break; } return rval; } void cortexa8::init_mode() { __asm__ __volatile__ ("mov r0, r13; mov r1, r14; cps #0x1f; mov r13, r0; mov r14, r1" : : : "r0", "r1"); _impure_ptr = &initial_thread_cb.newlibReent; } void cortexa8::enable_icache() noexcept { uint32_t reg; __asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; orr %[reg], %[reg], #0x1000; mcr 15, 0, %[reg], c1, c0, 0; isb" : [reg] "=r"(reg)); } void cortexa8::enable_dcache() noexcept { uint32_t reg; __asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; orr %[reg], %[reg], #0x4; mcr 15, 0, %[reg], c1, c0, 0" : [reg] "=r"(reg)); } void cortexa8::disable_icache() noexcept { uint32_t reg; __asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; bic %[reg], %[reg], #0x1000; mcr 15, 0, %[reg], c1, c0, 0; isb" : [reg] "=r"(reg)); } void cortexa8::disable_dcache() noexcept { uint32_t reg; __asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; bic %[reg], %[reg], #0x4; mcr 15, 0, %[reg], c1, c0, 0" : [reg] "=r"(reg)); } void cortexa8::errata() noexcept { // Errata 458693: Disable PLD and enable L1NEON __asm__ __volatile__ ("mrc 15, 0, r0, c1, c0, 1; orr r0, r0, #0x220; mcr 15, 0, r0, c1, c0, 1" : : : "r0"); } struct ptentry_section_t { unsigned :1; unsigned one:1; unsigned b:1; unsigned c:1; unsigned xn:1; unsigned domain:4; unsigned :1; unsigned ap:2; unsigned tex:3; unsigned ap2:1; unsigned s:1; unsigned ng:1; unsigned :1; unsigned ns:1; unsigned baseadr:12; }; struct ptentry_ptl2_t { unsigned one:1; unsigned :2; unsigned ns:1; unsigned :1; unsigned domain:4; unsigned :1; unsigned ptadr:22; }; struct ptl2entry_page_t { unsigned xn:1; unsigned one:1; unsigned b:1; unsigned c:1; unsigned ap:2; unsigned tex:3; unsigned ap2:1; unsigned s:1; unsigned ng:1; unsigned baseadr:20; }; // Import symbols from linker extern uint32_t _kernel_ptl1; extern uint32_t _kernel_ptl2; extern uint32_t _kernel_real_numl2pt; extern uint32_t _kernel_numl2ent; extern uint32_t __io_start; // Fix some variables static const uintptr_t io_start = (uintptr_t)&__io_start; static const unsigned numl2pt = (unsigned)&_kernel_real_numl2pt; static const unsigned numl2ent = (unsigned)&_kernel_numl2ent; static volatile uint32_t *const ttable1 = &_kernel_ptl1; static volatile uint32_t *const kern_ptl2 = &_kernel_ptl2; static uint32_t volatile* ttable1_virt[4096]; // Map console UART I/O region statically, for debug output while // initializing memory management static volatile ptl2entry_page_t ttable2_earlyio[256] __attribute__((aligned(1024))); // Map first section of pagetable map area static volatile ptl2entry_page_t ttable2_ptmap[256] __attribute__((aligned(1024))); bool _mmu_is_init = false; static unsigned ptmap_area_top = 256, ptmap_area_free = 0; static void _init_ttable() noexcept { memset((void*)ttable1, 0, 16384); memset((void*)kern_ptl2, 0, 1024*numl2pt); memset((void*)ttable2_earlyio, 0, 1024); memset((void*)ttable2_ptmap, 0, 1024); for(unsigned i = 0;i < numl2pt;++i) { ttable1_virt[2048+i] = kern_ptl2 + 256*i; ttable1[2048+i] = (uint32_t)(kern_ptl2+256*i) | 0x1; } ttable1_virt[4095] = (uint32_t*)ttable2_earlyio; ttable1[4095] = (uint32_t)(ttable2_earlyio) | 0x1; ttable1_virt[3072] = (uint32_t*)ttable2_ptmap; ttable1[3072] = (uint32_t)(ttable2_ptmap) | 0x1; for(unsigned i = 0;i < numl2ent;++i) { kern_ptl2[i] = (0x80000000+0x1000*i) | 0x576; } ttable2_earlyio[255].baseadr = 0x49020000>>12; ttable2_earlyio[255].one = 1; ttable2_earlyio[255].ap2 = 0; ttable2_earlyio[255].ap = 0x3; ttable2_earlyio[255].tex = 0; ttable2_earlyio[255].c = 0; ttable2_earlyio[255].b = 1; } // static ptentry_section_t ttable1[4096] __attribute__((aligned(16384))); // //int test[1048576] __attribute__((__used__)); // static void _init_ttable() noexcept { // // Create 1:1 translation table for entire address space with appropriate memory types // memset((void*)ttable1, 0, 16384); // // 0x00000000..0x7fffffff MMIO (Non-cacheable) // for(int i = 0;i < 2048;++i) { // ttable1[i].baseadr = i; // ttable1[i].one = 1; // // Read/write at any privilege // ttable1[i].ap2 = 0; // ttable1[i].ap = 0x3; // // Shareable device // ttable1[i].tex = 0; // ttable1[i].c = 0; // ttable1[1].b = 1; // }; // // 0x80000000..0x8fffffff RAM (Cacheable) // for(int i = 2048;i < 2304;++i) { // ttable1[i].baseadr = i; // ttable1[i].one = 1; // // Read/write at any privilege // ttable1[i].ap2 = 0; // ttable1[i].ap = 0x3; // // Cacheable // ttable1[i].tex = 0x5; // ttable1[i].c = 0; // ttable1[i].b = 1; // ttable1[i].s = 1; // }; // // 0x90000000..0xffffffff ??? (Non-cacheable) // for(int i = 2304;i < 4095;++i) { // ttable1[i].baseadr = i; // ttable1[i].one = 1; // // Read/write at any privilege // ttable1[i].ap2 = 0; // ttable1[i].ap = 0x3; // // Shareable device // ttable1[i].tex = 0; // ttable1[i].c = 0; // ttable1[1].b = 1; // }; // } void cortexa8::init_mmu() noexcept { _init_ttable(); // Set Translation Table Base Register 1 uint32_t reg = ((uint32_t)ttable1); reg |= 0xb; __asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 1" : : [val] "r"(reg)); // Set Translation Table Base Control Register __asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 2" : : [val] "r"(0x01)); // Set domains register __asm__ __volatile__ ("mcr 15, 0, %[val], c3, c0, 0" : : [val] "r"(0x1)); // Flush L1 page table from L1$ for(int i = 0;i < 256;++i) { reg = ((uint32_t)ttable1)+i*64; __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" : : [val] "r"(reg)); } // Flush L2 page tables from L1$ for(unsigned j = 0;j < numl2pt;++j) for(int i = 0;i < 16;++i) { reg = ((uint32_t)kern_ptl2)+j*1024+i*64; __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" : : [val] "r"(reg)); } for(int i = 0;i < 16;++i) { reg = ((uint32_t)ttable2_earlyio)+i*64; __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" : : [val] "r"(reg)); } for(int i = 0;i < 16;++i) { reg = ((uint32_t)ttable2_ptmap)+i*64; __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" : : [val] "r"(reg)); } __asm__ __volatile__ ("dsb"); // Invalidate TLBs __asm__ __volatile__ ("mcr 15, 0, r0, c8, c5, 0 ; mcr 15, 0, r0, c8, c6, 0; isb"); // Enable MMU __asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; orr %[reg], %[reg], #0x1; mcr 15, 0, %[reg], c1, c0, 0; isb" : [reg] "=r"(reg)); _mmu_is_init = true; } static void _map_page(unsigned l1, unsigned l2, uintptr_t phys, uint32_t mode) { ttable1_virt[l1][l2] = phys | mode; } static bool recursing = false; void cortexa8::map_pages(uintptr_t virt, uintptr_t phys, unsigned count) { assert(_mmu_is_init); // Check alignment assert((virt&0xfff) == 0); assert((phys&0xfff) == 0); unsigned virt_l1 = virt/1048576, virt_l2 = (virt%1048576)/4096; unsigned virt_end_l1 = (virt+count*4096-1)/1048576, virt_end_l2 = ((virt+count*4096-1)%1048576)/4096; if(virt_l1 != virt_end_l1) { // Allocation spans multiple L2 pagetables assert(!recursing); for(unsigned i = virt_l1;i <= virt_end_l1;++i) { unsigned cur_l2 = (virt%1048576)/4096; unsigned pages_on_i = (cur_l2+count>256)?(256-cur_l2):count; recursing = true; map_pages(virt, phys, pages_on_i); count -= pages_on_i; virt += pages_on_i*4096; phys += pages_on_i*4096; } recursing = false; } bool hw = (virt>=io_start); // Check if L2 pagetable for area exists if(ttable1_virt[virt_l1] == 0) { // Allocate and build new L2 pt if(ptmap_area_free == ptmap_area_top) { // Expand L2 pagetable map area assert(false && "NYI"); } // Allocate memory uintptr_t newpt_p = phys_mm::alloc(1); // Map to pt map area uintptr_t newpt_v = 0xc0000000 + ptmap_area_free++*4096; map_pages(newpt_v, newpt_p, 1); // Install new L2 page table memset((void*)newpt_v, 0, 4096); // There are 4 L2 page tables on a page for(int i = 0;i < 4;++i) { ttable1_virt[virt_l1+i] = (uint32_t*)(newpt_v+1024*i); ttable1[virt_l1+i] = (newpt_p+1024*i) | 0x1; } // Flush L1 pt entry from L1D$ __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1; dsb" : : [val] "r"(ttable1+virt_l1)); if(((uintptr_t)(ttable1+virt_l1)%64) != ((uintptr_t)(ttable1+virt_l1+16)%64)) __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1; dsb" : : [val] "r"(ttable1+virt_l1+16)); } // Map in L2 pt for(unsigned i = virt_l2;i <= virt_end_l2;++i) { _map_page(virt_l1, i, phys, hw?0x036:0x576); phys += 4096; } // Flush L2 pt from L1D$ unsigned l2_cache_s = virt_l2/16, l2_cache_e = virt_end_l2/16 + (virt_end_l2%16==0)?0:1; for(unsigned i = l2_cache_s;i <= l2_cache_e;++i) { uint32_t reg = ((uint32_t)ttable1_virt[virt_l1])+i*64; __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" : : [val] "r"(reg)); } __asm__ __volatile__ ("dsb"); // Invalidate TLBs for(uintptr_t va = virt; va < virt+4096*count;va += 4096) __asm__ __volatile__("mcr 15, 0, %[mva], c8, c7, 1" : : [mva] "r"(va)); __asm__ __volatile__ ("isb"); } void cortexa8::unmap_pages(uintptr_t virt, unsigned count) { assert(_mmu_is_init); // Check alignment assert((virt&0xfff) == 0); unsigned virt_l1 = virt/1048576, virt_l2 = (virt%1048576)/4096; unsigned virt_end_l1 = (virt+count*4096-1)/1048576, virt_end_l2 = ((virt+count*4096-1)%1048576)/4096; // Clear L2 pagetable entries for(unsigned l1 = virt_l1;l1 <= virt_end_l1;++l1) { assert(ttable1_virt[l1] != nullptr); for(unsigned l2 = ((l1==virt_l1)?virt_l2:0);l2 <= ((l1==virt_end_l1)?virt_end_l2:255);++l2) { ttable1_virt[l1][l2] = 0; } } // Flush L2 pagetables from L1D$ for(unsigned l1 = virt_l1;l1 <= virt_end_l1;++l1) { for(unsigned cl = ((l1==virt_l1)?virt_l2/16:0); cl <= ((l1==virt_end_l1)?(virt_end_l2/16+((virt_end_l2%16==0)?0:1)):15); ++cl) { __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" : : [val] "r"(ttable1_virt[l1]+cl*16)); } } __asm__ __volatile__ ("dsb"); // Invalidate TLBs for(uintptr_t va = virt; va < virt+4096*count;va += 4096) __asm__ __volatile__("mcr 15, 0, %[mva], c8, c7, 1" : : [mva] "r"(va)); __asm__ __volatile__ ("isb"); } extern uint32_t __stack_excp; extern uint32_t __stack_int; void cortexa8::init_handlers() noexcept { // Set stack pointers cortexa8_set_und_sp(&__stack_excp); cortexa8_set_abt_sp(&__stack_excp); cortexa8_set_irq_sp(&__stack_int); cortexa8_set_fiq_sp(&__stack_int); // Set VBAR __asm__ __volatile__("mcr 15, 0, %[reg], c12, c0, 0" : : [reg] "r"(&_vect_table)); } // Default interrupt / exception handlers extern "C" { void _cortexa8_excp_data_abt() __attribute__((interrupt ("ABORT"))); void _cortexa8_excp_pf_abt() __attribute__((interrupt ("ABORT"))); void _cortexa8_excp_undef() __attribute__((interrupt ("UNDEF"))); void cortexa8_syscall(); void _cortexa8_unhandled_fiq() __attribute__((interrupt ("FIQ"))); } void _cortexa8_excp_data_abt() { uint32_t lr, dfar, dfsr; __asm__ ("mov %[lr], lr; mrc 15, 0, %[dfsr], c5, c0, 0; mrc 15, 0, %[dfar], c6, c0, 0" : [lr] "=r"(lr), [dfsr] "=r"(dfsr), [dfar] "=r"(dfar)); printf("ERROR: Data abort\n"); printf("PC: %.8lx Fault Address: %.8lx Fault code: %lx\n", lr-4, dfar, ((dfsr>>7)&0x20) | ((dfsr>>6)&0x10) | (dfsr&0xf)); while(1) { __asm__ __volatile__ ("wfi"); } } void _cortexa8_excp_pf_abt() { uint32_t lr, ifar, ifsr; __asm__ ("mov %[lr], lr; mrc 15, 0, %[ifsr], c5, c0, 1; mrc 15, 0, %[ifar], c6, c0, 2" : [lr] "=r"(lr), [ifsr] "=r"(ifsr), [ifar] "=r"(ifar)); printf("ERROR: Prefetch abort\n"); printf("PC: %.8lx Fault Address: %.8lx Fault code: %lx\n", lr-4, ifar, ((ifsr>>7)&0x20) | ((ifsr>>6)&0x10) | (ifsr&0xf)); while(1) { __asm__ __volatile__ ("wfi"); } } void _cortexa8_excp_undef() { uint32_t lr, spsr; __asm__ ("mov %[lr], lr" : [lr] "=r"(lr)); spsr = cortexa8_get_spsr(); printf("ERROR: Undefined instruction\n"); printf("PC: %.8lx\n", lr-((spsr&0x20)?2:4)); while(1) {} } void cortexa8_syscall() { printf("Syscall NYI\n"); } void _cortexa8_unhandled_fiq() { printf("UNHANDLED FINTERRUPT\n"); } // Context switching stuff cortexa8::thread_cb *cortexa8::get_cur_thread() { return _current_thread_cb; } void cortexa8::set_cur_thread(thread_cb* tcb) { _current_thread_cb = tcb; } void cortexa8::exit_svc() { __asm__ __volatile__ ("svc 0" : : : "r0"); } void cortexa8::yield_svc() { __asm__ __volatile__ ("svc 1" : : : "r0"); } bool cortexa8::in_handler() { uint32_t mode = cortexa8_get_cpsr() & 0x1f; if((mode == 0x10) || (mode == 0x1f)) return false; return true; }