diff --git a/Makefile b/Makefile index b1989f0..c491ba8 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,10 @@ OBJCOPY=arm-none-eabi-objcopy OBJDUMP=arm-none-eabi-objdump #CFLAGS=-ffreestanding -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp -mfpu=neon --std=gnu99 -ggdb -Wall -Wextra -pedantic -Wno-unused-parameter -Og -flto -CXXFLAGS=-ffreestanding -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp -mfpu=neon --std=c++11 -ggdb -Wall -Wextra -pedantic -Wno-unused-parameter -fstrict-enums -Wabi -Og -flto +CXXFLAGS=-ffreestanding -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp -mfpu=neon --std=c++11 -ggdb -Wall -Wextra -pedantic -Wno-unused-parameter -fno-rtti -fstrict-enums -Wabi -Og -flto LDFLAGS=-static -C_SRCS=drv_omap35x_gpt.c omap35x_intc.c omap35x_prcm.c syscall.c -CXX_SRCS=cortexa8.cc main.cc omap35x.cc uart.cc +C_SRCS=syscall.c +CXX_SRCS=cortexa8.cc drv_omap35x_gpt.cc main.cc mm.cc omap35x.cc omap35x_intc.cc omap35x_prcm.cc phys_mm.cc uart.cc S_SRCS=cortexa8_asm.s syscall_asm.s OBJS=$(addprefix objs/,$(C_SRCS:.c=.o)) $(addprefix objs/,$(CXX_SRCS:.cc=.o)) $(addprefix objs/,$(S_SRCS:.s=.o)) @@ -29,7 +29,7 @@ beagle-nand.bin: fw.img ./bb_nandflash_ecc $@ 0x0 0xe80000 || true qemu: beagle-nand.bin - qemu-system-arm -M beagle -m 256M -mtdblock beagle-nand.bin -nographic + qemu-system-arm -M beagle -m 256M -mtdblock beagle-nand.bin -nographic -s objs/%.o: %.c $(CXX) $(CXXFLAGS) -c -MMD -MP -o $@ $< diff --git a/cortexa8.cc b/cortexa8.cc index 6a93124..92bb834 100644 --- a/cortexa8.cc +++ b/cortexa8.cc @@ -3,6 +3,7 @@ #include #include +#include "phys_mm.hh" #include "cortexa8.hh" extern int _vect_table; @@ -65,7 +66,7 @@ void cortexa8::disable_dcache() noexcept { : [reg] "=r"(reg)); } -typedef struct { +struct ptentry_section_t { unsigned :1; unsigned one:1; unsigned b:1; @@ -81,78 +82,179 @@ typedef struct { unsigned :1; unsigned ns:1; unsigned baseadr:12; -} ptentry_section_t; +}; -static ptentry_section_t ttable1[4096] __attribute__((aligned(16384))); +struct ptentry_ptl2_t { + unsigned one:1; + unsigned :2; + unsigned ns:1; + unsigned :1; + unsigned domain:4; + unsigned :1; + unsigned ptadr:22; +}; + +struct ptl2entry_page_t { + unsigned xn:1; + unsigned one:1; + unsigned b:1; + unsigned c:1; + unsigned ap:2; + unsigned tex:3; + unsigned ap2:1; + unsigned s:1; + unsigned ng:1; + unsigned baseadr:20; +}; + +// Import symbols from linker +extern uint32_t _kernel_ptl1; +extern uint32_t _kernel_ptl2; +extern uint32_t _kernel_real_numl2pt; +extern uint32_t _kernel_numl2ent; +extern uint32_t __io_start; + +// Fix some variables +static const uintptr_t io_start = (uintptr_t)&__io_start; +static const unsigned numl2pt = (unsigned)&_kernel_real_numl2pt; +static const unsigned numl2ent = (unsigned)&_kernel_numl2ent; + +static volatile uint32_t *const ttable1 = &_kernel_ptl1; +static volatile uint32_t *const kern_ptl2 = &_kernel_ptl2; +static uint32_t volatile* ttable1_virt[4096]; + +// Map console UART I/O region statically, for debug output while +// initializing memory management +static volatile ptl2entry_page_t ttable2_earlyio[256] __attribute__((aligned(1024))); + +// Map first section of pagetable map area +static volatile ptl2entry_page_t ttable2_ptmap[256] __attribute__((aligned(1024))); + +bool _mmu_is_init = false; +static unsigned ptmap_area_top = 256, ptmap_area_free = 0; static void _init_ttable() noexcept { - // Create 1:1 translation table for entire address space with appropriate memory types memset((void*)ttable1, 0, 16384); - // 0x00000000..0x7fffffff MMIO (Non-cacheable) - for(int i = 0;i < 2048;++i) { - ttable1[i].baseadr = i; - ttable1[i].one = 1; + memset((void*)kern_ptl2, 0, 1024*numl2pt); + memset((void*)ttable2_earlyio, 0, 1024); + memset((void*)ttable2_ptmap, 0, 1024); - // Read/write at any privilege - ttable1[i].ap2 = 0; - ttable1[i].ap = 0x3; - // Shareable device - ttable1[i].tex = 0; - ttable1[i].c = 0; - ttable1[1].b = 1; - }; + for(unsigned i = 0;i < numl2pt;++i) { + ttable1_virt[2048+i] = kern_ptl2 + 256*i; + ttable1[2048+i] = (uint32_t)(kern_ptl2+256*i) | 0x1; + } + ttable1_virt[4095] = (uint32_t*)ttable2_earlyio; + ttable1[4095] = (uint32_t)(ttable2_earlyio) | 0x1; + ttable1_virt[3072] = (uint32_t*)ttable2_ptmap; + ttable1[3072] = (uint32_t)(ttable2_ptmap) | 0x1; - // 0x80000000..0x8fffffff RAM (Cacheable) - for(int i = 2048;i < 2304;++i) { - ttable1[i].baseadr = i; - ttable1[i].one = 1; + for(unsigned i = 0;i < numl2ent;++i) { + kern_ptl2[i] = (0x80000000+0x1000*i) | 0x576; + } - // Read/write at any privilege - ttable1[i].ap2 = 0; - ttable1[i].ap = 0x3; - // Cacheable - ttable1[i].tex = 0x5; - ttable1[i].c = 0; - ttable1[i].b = 1; - ttable1[i].s = 1; - }; - - // 0x90000000..0xffffffff ??? (Non-cacheable) - for(int i = 2304;i < 4095;++i) { - ttable1[i].baseadr = i; - ttable1[i].one = 1; - - // Read/write at any privilege - ttable1[i].ap2 = 0; - ttable1[i].ap = 0x3; - // Shareable device - ttable1[i].tex = 0; - ttable1[i].c = 0; - ttable1[1].b = 1; - }; - + ttable2_earlyio[255].baseadr = 0x49020000>>12; + ttable2_earlyio[255].one = 1; + ttable2_earlyio[255].ap2 = 0; + ttable2_earlyio[255].ap = 0x3; + ttable2_earlyio[255].tex = 0; + ttable2_earlyio[255].c = 0; + ttable2_earlyio[255].b = 1; } +// static ptentry_section_t ttable1[4096] __attribute__((aligned(16384))); + +// //int test[1048576] __attribute__((__used__)); + +// static void _init_ttable() noexcept { +// // Create 1:1 translation table for entire address space with appropriate memory types +// memset((void*)ttable1, 0, 16384); + +// // 0x00000000..0x7fffffff MMIO (Non-cacheable) +// for(int i = 0;i < 2048;++i) { +// ttable1[i].baseadr = i; +// ttable1[i].one = 1; + +// // Read/write at any privilege +// ttable1[i].ap2 = 0; +// ttable1[i].ap = 0x3; +// // Shareable device +// ttable1[i].tex = 0; +// ttable1[i].c = 0; +// ttable1[1].b = 1; +// }; + +// // 0x80000000..0x8fffffff RAM (Cacheable) +// for(int i = 2048;i < 2304;++i) { +// ttable1[i].baseadr = i; +// ttable1[i].one = 1; + +// // Read/write at any privilege +// ttable1[i].ap2 = 0; +// ttable1[i].ap = 0x3; +// // Cacheable +// ttable1[i].tex = 0x5; +// ttable1[i].c = 0; +// ttable1[i].b = 1; +// ttable1[i].s = 1; +// }; + +// // 0x90000000..0xffffffff ??? (Non-cacheable) +// for(int i = 2304;i < 4095;++i) { +// ttable1[i].baseadr = i; +// ttable1[i].one = 1; + +// // Read/write at any privilege +// ttable1[i].ap2 = 0; +// ttable1[i].ap = 0x3; +// // Shareable device +// ttable1[i].tex = 0; +// ttable1[i].c = 0; +// ttable1[1].b = 1; +// }; + +// } + void cortexa8::init_mmu() noexcept { _init_ttable(); - // Set Translation Table base Ptr 0 - uint32_t reg = ((uint32_t)ttable1) & 0xffffc000u; + // Set Translation Table Base Register 1 + uint32_t reg = ((uint32_t)ttable1); reg |= 0xb; - __asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 0" + __asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 1" : : [val] "r"(reg)); + // Set Translation Table Base Control Register + __asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 2" + : : [val] "r"(0x01)); + // Set domains register __asm__ __volatile__ ("mcr 15, 0, %[val], c3, c0, 0" : : [val] "r"(0x1)); - // Flush trans. table from L1$ + // Flush L1 page table from L1$ for(int i = 0;i < 256;++i) { reg = ((uint32_t)ttable1)+i*64; __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" : : [val] "r"(reg)); } + // Flush L2 page tables from L1$ + for(unsigned j = 0;j < numl2pt;++j) + for(int i = 0;i < 16;++i) { + reg = ((uint32_t)kern_ptl2)+j*1024+i*64; + __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" + : : [val] "r"(reg)); + } + for(int i = 0;i < 16;++i) { + reg = ((uint32_t)ttable2_earlyio)+i*64; + __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" + : : [val] "r"(reg)); + } + for(int i = 0;i < 16;++i) { + reg = ((uint32_t)ttable2_ptmap)+i*64; + __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" + : : [val] "r"(reg)); + } __asm__ __volatile__ ("dsb"); // Invalidate TLBs @@ -161,8 +263,128 @@ void cortexa8::init_mmu() noexcept { // Enable MMU __asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; orr %[reg], %[reg], #0x1; mcr 15, 0, %[reg], c1, c0, 0; isb" : [reg] "=r"(reg)); + + _mmu_is_init = true; } +static void _map_page(unsigned l1, unsigned l2, uintptr_t phys, uint32_t mode) { + ttable1_virt[l1][l2] = phys | mode; +} + +static bool recursing = false; + +void cortexa8::map_pages(uintptr_t virt, uintptr_t phys, unsigned count) { + assert(_mmu_is_init); + // Check alignment + assert((virt&0xfff) == 0); + assert((phys&0xfff) == 0); + + unsigned virt_l1 = virt/1048576, virt_l2 = (virt%1048576)/4096; + unsigned virt_end_l1 = (virt+count*4096-1)/1048576, virt_end_l2 = ((virt+count*4096-1)%1048576)/4096; + if(virt_l1 != virt_end_l1) { // Allocation spans multiple L2 pagetables + assert(!recursing); + for(unsigned i = virt_l1;i <= virt_end_l1;++i) { + unsigned cur_l2 = (virt%1048576)/4096; + unsigned pages_on_i = (cur_l2+count>256)?(256-cur_l2):count; + recursing = true; + map_pages(virt, phys, pages_on_i); + count -= pages_on_i; + virt += pages_on_i*4096; + phys += pages_on_i*4096; + } + recursing = false; + } + + bool hw = (virt>=io_start); + + // Check if L2 pagetable for area exists + if(ttable1_virt[virt_l1] == 0) { + // Allocate and build new L2 pt + if(ptmap_area_free == ptmap_area_top) { + // Expand L2 pagetable map area + assert(false && "NYI"); + } + + // Allocate memory + uintptr_t newpt_p = phys_mm::alloc(1); + // Map to pt map area + uintptr_t newpt_v = 0xc0000000 + ptmap_area_free++*4096; + map_pages(newpt_v, newpt_p, 1); + + // Install new L2 page table + memset((void*)newpt_v, 0, 4096); + // There are 4 L2 page tables on a page + for(int i = 0;i < 4;++i) { + ttable1_virt[virt_l1+i] = (uint32_t*)(newpt_v+1024*i); + ttable1[virt_l1+i] = (newpt_p+1024*i) | 0x1; + } + // Flush L1 pt entry from L1D$ + __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1; dsb" + : : [val] "r"(ttable1+virt_l1)); + if(((uintptr_t)(ttable1+virt_l1)%64) != ((uintptr_t)(ttable1+virt_l1+16)%64)) + __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1; dsb" + : : [val] "r"(ttable1+virt_l1+16)); + } + + // Map in L2 pt + for(unsigned i = virt_l2;i <= virt_end_l2;++i) { + _map_page(virt_l1, i, phys, hw?0x036:0x576); + phys += 4096; + } + + // Flush L2 pt from L1D$ + unsigned l2_cache_s = virt_l2/16, l2_cache_e = virt_end_l2/16 + (virt_end_l2%16==0)?0:1; + for(unsigned i = l2_cache_s;i <= l2_cache_e;++i) { + uint32_t reg = ((uint32_t)ttable1_virt[virt_l1])+i*64; + __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" + : : [val] "r"(reg)); + } + __asm__ __volatile__ ("dsb"); + + // Invalidate TLBs + for(uintptr_t va = virt; va < virt+4096*count;va += 4096) + __asm__ __volatile__("mcr 15, 0, %[mva], c8, c7, 1" + : : [mva] "r"(va)); + + __asm__ __volatile__ ("isb"); +} + +void cortexa8::unmap_pages(uintptr_t virt, unsigned count) { + assert(_mmu_is_init); + // Check alignment + assert((virt&0xfff) == 0); + + unsigned virt_l1 = virt/1048576, virt_l2 = (virt%1048576)/4096; + unsigned virt_end_l1 = (virt+count*4096-1)/1048576, virt_end_l2 = ((virt+count*4096-1)%1048576)/4096; + + // Clear L2 pagetable entries + for(unsigned l1 = virt_l1;l1 <= virt_end_l1;++l1) { + assert(ttable1_virt[l1] != nullptr); + for(unsigned l2 = (l1==virt_l1)?virt_l2:0;l2 <= (l1==virt_end_l1)?virt_end_l2:255;++l2) { + ttable1_virt[l1][l2] = 0; + } + } + + // Flush L2 pagetables from L1D$ + for(unsigned l1 = virt_l1;l1 <= virt_end_l1;++l1) { + for(unsigned cl = (l1==virt_l1)?virt_l2/16:0; + cl <= (l1==virt_end_l1)?(virt_end_l2/16+((virt_end_l2%16==0)?0:1)):15; + ++cl) { + __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1" + : : [val] "r"(ttable1_virt[l1]+cl*16)); + } + } + __asm__ __volatile__ ("dsb"); + + // Invalidate TLBs + for(uintptr_t va = virt; va < virt+4096*count;va += 4096) + __asm__ __volatile__("mcr 15, 0, %[mva], c8, c7, 1" + : : [mva] "r"(va)); + + __asm__ __volatile__ ("isb"); +} + + extern uint32_t __stack_excp; extern uint32_t __stack_int; void cortexa8::init_handlers() noexcept { diff --git a/cortexa8.hh b/cortexa8.hh index a88acd7..598a3b7 100644 --- a/cortexa8.hh +++ b/cortexa8.hh @@ -22,6 +22,8 @@ namespace cortexa8 { void init_handlers() noexcept; + void map_pages(uintptr_t virt, uintptr_t phys, unsigned count); + void unmap_pages(uintptr_t virt, unsigned count); } // Implemented in cortexa8_asm.s diff --git a/drv_omap35x_gpt.cc b/drv_omap35x_gpt.cc index 0f6978b..d576d8b 100644 --- a/drv_omap35x_gpt.cc +++ b/drv_omap35x_gpt.cc @@ -3,9 +3,10 @@ #include #include -#include "drv_omap35x_gpt.hh" #include "omap35x_intc.hh" #include "util.hh" +#include "mmio.hh" +#include "drv_omap35x_gpt.hh" #define TIOCP_CFG 4 #define TISR 6 @@ -52,28 +53,28 @@ private: r_tisr() = 0x2; } - uint32_t volatile& r_tidr() {return _reg32(base_, 0x0); } - uint32_t volatile& r_tiocp_cfg() {return _reg32(base_, 0x10); } - uint32_t volatile& r_tistat() {return _reg32(base_, 0x14); } - uint32_t volatile& r_tisr() {return _reg32(base_, 0x18); } - uint32_t volatile& r_tier() {return _reg32(base_, 0x1c); } - uint32_t volatile& r_twer() {return _reg32(base_, 0x20); } - uint32_t volatile& r_tclr() {return _reg32(base_, 0x24); } - uint32_t volatile& r_tcrr() {return _reg32(base_, 0x28); } - uint32_t volatile& r_tldr() {return _reg32(base_, 0x2c); } - uint32_t volatile& r_ttgr() {return _reg32(base_, 0x30); } - uint32_t volatile& r_twps() {return _reg32(base_, 0x34); } - uint32_t volatile& r_tmar() {return _reg32(base_, 0x38); } - uint32_t volatile& r_tcar1() {return _reg32(base_, 0x3c); } - uint32_t volatile& r_tsicr() {return _reg32(base_, 0x40); } - uint32_t volatile& r_tcar2() {return _reg32(base_, 0x44); } - uint32_t volatile& r_tpir() {return _reg32(base_, 0x48); } - uint32_t volatile& r_tnir() {return _reg32(base_, 0x4c); } - uint32_t volatile& r_tcvr() {return _reg32(base_, 0x50); } - uint32_t volatile& r_tocr() {return _reg32(base_, 0x54); } - uint32_t volatile& r_towr() {return _reg32(base_, 0x58); } + uint32_t volatile& r_tidr() {return _reg32(base_.get_virt(), 0x0); } + uint32_t volatile& r_tiocp_cfg() {return _reg32(base_.get_virt(), 0x10); } + uint32_t volatile& r_tistat() {return _reg32(base_.get_virt(), 0x14); } + uint32_t volatile& r_tisr() {return _reg32(base_.get_virt(), 0x18); } + uint32_t volatile& r_tier() {return _reg32(base_.get_virt(), 0x1c); } + uint32_t volatile& r_twer() {return _reg32(base_.get_virt(), 0x20); } + uint32_t volatile& r_tclr() {return _reg32(base_.get_virt(), 0x24); } + uint32_t volatile& r_tcrr() {return _reg32(base_.get_virt(), 0x28); } + uint32_t volatile& r_tldr() {return _reg32(base_.get_virt(), 0x2c); } + uint32_t volatile& r_ttgr() {return _reg32(base_.get_virt(), 0x30); } + uint32_t volatile& r_twps() {return _reg32(base_.get_virt(), 0x34); } + uint32_t volatile& r_tmar() {return _reg32(base_.get_virt(), 0x38); } + uint32_t volatile& r_tcar1() {return _reg32(base_.get_virt(), 0x3c); } + uint32_t volatile& r_tsicr() {return _reg32(base_.get_virt(), 0x40); } + uint32_t volatile& r_tcar2() {return _reg32(base_.get_virt(), 0x44); } + uint32_t volatile& r_tpir() {return _reg32(base_.get_virt(), 0x48); } + uint32_t volatile& r_tnir() {return _reg32(base_.get_virt(), 0x4c); } + uint32_t volatile& r_tcvr() {return _reg32(base_.get_virt(), 0x50); } + uint32_t volatile& r_tocr() {return _reg32(base_.get_virt(), 0x54); } + uint32_t volatile& r_towr() {return _reg32(base_.get_virt(), 0x58); } - uintptr_t base_; + MMIO_alloc base_; int irq_; int_handler_t handler_; }; diff --git a/exceptions.hh b/exceptions.hh new file mode 100644 index 0000000..923b027 --- /dev/null +++ b/exceptions.hh @@ -0,0 +1,8 @@ +#ifndef _EXCEPTIONS_HH_ +#define _EXCEPTIONS_HH_ + +namespace ex { + class bad_alloc{}; +} + +#endif diff --git a/fw_cxx.ld b/fw_cxx.ld index b9fbb46..1f85796 100644 --- a/fw_cxx.ld +++ b/fw_cxx.ld @@ -182,27 +182,50 @@ SECTIONS . = ALIGN(. != 0 ? 32 / 8 : 1); } _bss_end__ = . ; __bss_end__ = . ; - . = ALIGN(32 / 8); - . = ALIGN(32 / 8); + + . = ALIGN(64); + . = . + 0x4000; /* 64KiB exception stack */ + __stack_excp = .; + . = . + 0x4000; /* 64KiB interrupt stack */ + __stack_int = .; + . = . + 0x4000; /* 64KiB kernel startup stack */ + __stack = .; + __end__ = . ; _end = .; PROVIDE (end = .); - . = ALIGN(64); - .stack : - { - . = . + 0x200000; /* 2MiB stack should be enough... */ - __stack = .; + /* L1 page table must be aligned at 16KiB boundary */ + . = ALIGN(16384); + _kernel_ptl1 = .; + . = . + 0x4000; /* Allocate 16KiB for L1 PT */ + _kernel_ptl2 = .; + /* Calculate the number of L2 PTs to cover the kernel image + bss */ + _kernel_numl2pt = ((_end - 0x80000000) / 1048576) + (((_end - 0x80000000) % 1048576)==0?0:1); + /* Allocate 1KiB for each L2 PT */ + . = . + (0x400 * _kernel_numl2pt); + _kernel_pt_end = .; + /* If the page tables cross a L1 PT entry boundary (1MiB), signal to the + initialization code that an additional L1 entry and L2 page table must + be created for the memory containing the page tables */ + _kernel_additional_pt = ((_kernel_pt_end & 0xfff00000) == (_kernel_ptl1 & 0xfff00000))?0:1; + _kernel_real_numl2pt = _kernel_numl2pt + _kernel_additional_pt; + . = . + (0x400 * _kernel_additional_pt); + _kernel_real_pt_end = .; + _kernel_numl2ent = ((_kernel_real_pt_end - 0x80000000) / 4096) + (((_kernel_real_pt_end - 0x80000000) % 4096)==0?0:1); - *(.stack) - } - . = ALIGN(54); - . = . + 0x10000; /* 64KiB exception stack */ - __stack_excp = .; - . = . + 0x10000; /* 64KiB interrupt stack */ - __stack_int = .; __heap_start = .; /* for _sbrk */ - /* Rest until end of RAM is heap */ - __heap_end = 0x90000000; + /* Virtual address space from end of kernel to 0xbfffffff is heap */ + __heap_end = 0xc0000000; + + /* Virtual address space for kernel scratchpad */ + __scratch_start = 0xc0000000; + /* Map: + 0xc0000000 - 0xc0800000 Kernel L2 pagetables + */ + __scratch_end = 0xe0000000; + /* Virtual address space for MMIO */ + __io_start = 0xe0000000; + __io_end = 0xfffff000; /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) } } diff --git a/main.cc b/main.cc index ae4b7c9..351f78c 100644 --- a/main.cc +++ b/main.cc @@ -8,42 +8,63 @@ #include "omap35x.hh" #include "omap35x_intc.hh" #include "omap35x_prcm.hh" +#include "phys_mm.hh" +#include "mm.hh" #include "cortexa8.hh" #include "uart.hh" -static volatile uint32_t *const prcm_wkst_per = (uint32_t*)0x483070b0; - static volatile uint32_t tickctr = 0; +static OMAP35x_prcm* _prcm = nullptr; void tickfunc() noexcept { ++tickctr; - *prcm_wkst_per = (1<<3); // Clear GPT2 wake bit + + _prcm->clear_wake_per(3); } void setConsole(ICharacterDevice* newConsole); int main(int argc, char* argv[]) { - // Enable caches - cortexa8::enable_icache(); + // Initialize memory cortexa8::enable_dcache(); + cortexa8::enable_icache(); + cortexa8::init_mmu(); + + // Enable early console + EarlyUART earlyUART{}; + setConsole(&earlyUART); + + // Install handlers + cortexa8::init_handlers(); + + // Initialize physical memory managment + phys_mm::init(); + + // Initialize kernel dynamic memory management + mm::init(); + // From here on, malloc/free and new/delete may be used + + phys_mm::print_state(); // Configure PRCM OMAP35x_prcm prcm {0x48004000, 0x48306000}; + _prcm = &prcm; + + //while(1) {__asm__ __volatile__ ("wfi"); } // Configure interrrupt & exception handling - cortexa8::init_handlers(); OMAP35x_intc intc {0x48200000}; - cortexa8::init_mmu(); - prcm.enable_peripherals(); - UART consoleUART {0x49020000, 74}; + UART consoleUART {0x49020000, 74, prcm}; setConsole(&consoleUART); // Enable interrupts cortexa8_ena_int(); + + OMAP35x_Info chipInfo{0x48002000, 0x4830a000}; - omap35x_print_chip_id(); + chipInfo.print_chip_id(); printf("5\n"); @@ -63,6 +84,9 @@ int main(int argc, char* argv[]) { break; } + malloc_stats(); + phys_mm::print_state(); + while(1) { __asm__ __volatile__ ("wfi"); if(tickctr%100 == 0) { @@ -70,9 +94,7 @@ int main(int argc, char* argv[]) { fflush(stdout); } } - - - malloc_stats(); + return 0; } diff --git a/mm.cc b/mm.cc new file mode 100644 index 0000000..da44db8 --- /dev/null +++ b/mm.cc @@ -0,0 +1,60 @@ +#include +#include + +#include "cortexa8.hh" +#include "phys_mm.hh" +#include "util.hh" +#include "mm.hh" + +extern uint32_t __scratch_start, __scratch_end, __io_start, __io_end, __heap_start, __heap_end; + +static const uintptr_t scratch_start = (uintptr_t)&__scratch_start; +static const uintptr_t scratch_end = (uintptr_t)&__scratch_end; +static const uintptr_t io_start = (uintptr_t)&__io_start; +static const uintptr_t io_end = (uintptr_t)&__io_end; +static const uintptr_t heap_start = (uintptr_t)&__heap_start; +static const uintptr_t heap_end = (uintptr_t)&__heap_end; + +static uintptr_t heap_top, io_top; + +void mm::init() { + heap_top = phys_mm::get_end_of_kernel_alloc(); + + /* Map unused area of kernel image RAM to heap + The space between the end of the kernel and the end of + the power-of-2 block allocated for it by physical allocater is + lost otherwise */ + uintptr_t heap_start_align = ((heap_start&0xfff)==0)?heap_start:((heap_start&~0xfff)+4096); + cortexa8::map_pages(heap_start_align, heap_start_align, (heap_top-heap_start)/4096); + + io_top = io_start; +} + +uintptr_t mm::virtalloc_io(unsigned pages) { + if(io_top+0x1000*pages >= io_end) + throw bad_alloc{}; + + uintptr_t ret = io_top; + io_top += 0x1000*pages; + + return ret; +} + +uintptr_t mm::grow_heap(unsigned pages) { + // Allocations are done in powers to 2, so round up pages to next power of 2 + pages = _pow2(_ln2(pages)); + + if(heap_top+0x1000*pages >= heap_end) + throw bad_alloc{}; + + uintptr_t newphys = phys_mm::alloc(pages); + + cortexa8::map_pages(heap_top, newphys, pages); + + heap_top += 0x1000*pages; + return heap_top; +} + +uintptr_t mm::get_heap_end() { + return heap_top; +} diff --git a/mm.hh b/mm.hh new file mode 100644 index 0000000..ea372e2 --- /dev/null +++ b/mm.hh @@ -0,0 +1,26 @@ +#ifndef _MM_HH_ +#define _MM_HH_ + +#include + +#include "exceptions.hh" + +namespace mm { + // Initialize memory management + // Physical memory management must be initialized before calling mm::init + void init(); + + // Allocate 'pages' pages of virtual address space in the I/O region + uintptr_t virtalloc_io(unsigned pages); + + // Grow the kernel heap by 'pages' pages + // Allocate and map the desired amount of memory + // Return the new heap end + uintptr_t grow_heap(unsigned pages); + + uintptr_t get_heap_end(); + + class bad_alloc : public ex::bad_alloc {}; +} + +#endif diff --git a/mmio.hh b/mmio.hh new file mode 100644 index 0000000..1680362 --- /dev/null +++ b/mmio.hh @@ -0,0 +1,32 @@ +#ifndef _MMIO_HH_ +#define _MMIO_HH_ + +#include + +#include "mm.hh" +#include "cortexa8.hh" + +class MMIO_alloc { +public: + MMIO_alloc(uintptr_t base_p, unsigned size = 1) : base_p_{base_p}, base_v_{mm::virtalloc_io(size)}, size_{size} { + cortexa8::map_pages(base_v_, base_p_, size_); + } + + ~MMIO_alloc() { + cortexa8::unmap_pages(base_v_, size_); + } + + uintptr_t const& get_virt() const noexcept { + return base_v_; + } + + uintptr_t const& get_phys() const noexcept { + return base_p_; + } + +private: + uintptr_t base_p_, base_v_; + unsigned size_; +}; + +#endif diff --git a/omap35x.cc b/omap35x.cc index 5105183..7056c09 100644 --- a/omap35x.cc +++ b/omap35x.cc @@ -1,53 +1,83 @@ #include #include +#include +#include "util.hh" +#include "mmio.hh" #include "omap35x.hh" using std::printf; -static volatile uint32_t *const omap35x_omap_sr = (uint32_t*)0x4800244c; // 1 word -static volatile uint32_t *const omap35x_idcode = (uint32_t*)0x4830a204; // 1 dword -static volatile uint32_t *const omap35x_die_id = (uint32_t*)0x4830a218; // 4 dwords -static volatile uint32_t *const omap35x_skuid = (uint32_t*)0x4830a20c; // 1 dword +static const char *const omap_names[5] = {"OMAP3530", "OMAP3525", "OMAP3515", "OMAP3503", "UNKNOWN"}; +static const char *const omap_ver[8] = {"ES 1.0", "ES 2.0", "ES 2.1", "ES 3.0", "ES 3.1", "UNKNOWN", "UNKNOWN", "ES 3.1.2"}; -static const char *const omap_names[] = {"OMAP3530", "OMAP3525", "OMAP3515", "OMAP3503", "UNKNOWN"}; -static const char *const omap_ver[] = {"ES 1.0", "ES 2.0", "ES 2.1", "ES 3.0", "ES 3.1", "UNKNOWN", "UNKNOWN", "ES 3.1.2"}; - -void omap35x_print_chip_id() { - uint16_t omapsr = *omap35x_omap_sr&0xffffu; - int omapsr_idx; - switch(omapsr) { - case 0x0c00: - omapsr_idx = 0; - break; - case 0x4c00: - omapsr_idx = 1; - break; - case 0x1c00: - omapsr_idx = 2; - break; - case 0x5c00: - omapsr_idx = 3; - break; - default: - printf("Warning: Unknown OMAP35x type (%.8lx)\n", *omap35x_omap_sr); - omapsr_idx = 4; - break; +class OMAP35x_Info_impl { +public: + OMAP35x_Info_impl(uintptr_t scm_base, uintptr_t control_base) : scm_base_{scm_base}, control_base_{control_base} { } - uint32_t idcode = *omap35x_idcode; - int id_idx = (idcode&0xf0000000u)>>28; - if(id_idx > 7) // Versions 8..15 are unknown - id_idx = 6; - if(id_idx == 5 || id_idx == 6) - printf("Warning: Unknown OMAP35x version (%.8lx)\n", idcode); + ~OMAP35x_Info_impl() { + } - bool highfreq = false; - if((*omap35x_skuid&0xf) == 0x8) - highfreq = true; + void print_chip_id() { + uint16_t omapsr = r_omap_sr()&0xffffu; + int omapsr_idx; + switch(omapsr) { + case 0x0c00: + omapsr_idx = 0; + break; + case 0x4c00: + omapsr_idx = 1; + break; + case 0x1c00: + omapsr_idx = 2; + break; + case 0x5c00: + omapsr_idx = 3; + break; + default: + printf("Warning: Unknown OMAP35x type (%.8lx)\n", r_omap_sr()); + omapsr_idx = 4; + break; + } - printf("%s %s %s Serial# %.8lx%.8lx%.8lx%.8lx\n", - omap_names[omapsr_idx], omap_ver[id_idx], - highfreq?"720 MHz":"600 MHz", - omap35x_die_id[3], omap35x_die_id[2], omap35x_die_id[1], omap35x_die_id[0]); + uint32_t idcode = r_idcode(); + int id_idx = (idcode&0xf0000000u)>>28; + if(id_idx > 7) // Versions 8..15 are unknown + id_idx = 6; + if(id_idx == 5 || id_idx == 6) + printf("Warning: Unknown OMAP35x version (%.8lx)\n", idcode); + + bool highfreq = false; + if((r_skuid()&0xf) == 0x8) + highfreq = true; + + printf("%s %s %s Serial# %.8lx%.8lx%.8lx%.8lx\n", + omap_names[omapsr_idx], omap_ver[id_idx], + highfreq?"720 MHz":"600 MHz", + r_die_id(3), r_die_id(2), r_die_id(1), r_die_id(0)); + } + + std::array get_serial() { + return std::array{r_die_id(3), r_die_id(2), r_die_id(1), r_die_id(0)}; + } + +private: + MMIO_alloc scm_base_, control_base_; + + uint32_t volatile& r_omap_sr() { return _reg32(scm_base_.get_virt(), 0x44c); } + uint32_t volatile& r_idcode() { return _reg32(control_base_.get_virt(), 0x204); } + uint32_t volatile& r_die_id(int n) { assert(n >= 0 && n <= 3); return _reg32(control_base_.get_virt(), 0x218+0x4*n); } + uint32_t volatile& r_skuid() { return _reg32(control_base_.get_virt(), 0x20c); } +}; + + +OMAP35x_Info::OMAP35x_Info(uintptr_t scm_base, uintptr_t control_base) : impl_{new OMAP35x_Info_impl{scm_base, control_base}} { +} + +OMAP35x_Info::~OMAP35x_Info() { +} + +void OMAP35x_Info::print_chip_id() { + impl_->print_chip_id(); } diff --git a/omap35x.hh b/omap35x.hh index 8b13fd3..d30ccee 100644 --- a/omap35x.hh +++ b/omap35x.hh @@ -1,6 +1,19 @@ #ifndef _OMAP35X_HH_ #define _OMAP35X_HH_ -void omap35x_print_chip_id(); +#include + +class OMAP35x_Info_impl; + +class OMAP35x_Info { +public: + OMAP35x_Info(uintptr_t scm_base, uintptr_t control_base); + ~OMAP35x_Info(); + + void print_chip_id(); + +private: + std::unique_ptr impl_; +}; #endif diff --git a/omap35x_intc.cc b/omap35x_intc.cc index b3f80db..523ddd4 100644 --- a/omap35x_intc.cc +++ b/omap35x_intc.cc @@ -3,9 +3,10 @@ #include #include -#include "omap35x_intc.hh" #include "cortexa8.hh" +#include "mmio.hh" #include "util.hh" +#include "omap35x_intc.hh" extern "C" void _omap35x_intc_handler() __attribute__((interrupt ("IRQ"))); @@ -72,27 +73,27 @@ private: __asm__ __volatile__ ("dsb"); } - uint32_t volatile& r_sysconfig() {return _reg32(base_, 0x10); } - uint32_t volatile& r_sysstatus() {return _reg32(base_, 0x14); } - uint32_t volatile& r_sir_irq() {return _reg32(base_, 0x40); } - uint32_t volatile& r_sir_fiq() {return _reg32(base_, 0x44); } - uint32_t volatile& r_control() {return _reg32(base_, 0x48); } - uint32_t volatile& r_protection() {return _reg32(base_, 0x4c); } - uint32_t volatile& r_idle() {return _reg32(base_, 0x50); } - uint32_t volatile& r_irq_priority() {return _reg32(base_, 0x60); } - uint32_t volatile& r_fiq_priority() {return _reg32(base_, 0x64); } - uint32_t volatile& r_threshold() {return _reg32(base_, 0x68); } - uint32_t volatile& r_itr(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x80+0x20*n); } - uint32_t volatile& r_mir(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x84+0x20*n); } - uint32_t volatile& r_mir_clear(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x88+0x20*n); } - uint32_t volatile& r_mir_set(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x8c+0x20*n); } - uint32_t volatile& r_isr_set(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x90+0x20*n); } - uint32_t volatile& r_isr_clear(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x94+0x20*n); } - uint32_t volatile& r_pending_irq(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x98+0x20*n); } - uint32_t volatile& r_pending_fiq(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x9c+0x20*n); } - uint32_t volatile& r_ilr(int m) {assert(m >= 0 && m <= 95); return _reg32(base_, 0x100+0x4*m); } + uint32_t volatile& r_sysconfig() {return _reg32(base_.get_virt(), 0x10); } + uint32_t volatile& r_sysstatus() {return _reg32(base_.get_virt(), 0x14); } + uint32_t volatile& r_sir_irq() {return _reg32(base_.get_virt(), 0x40); } + uint32_t volatile& r_sir_fiq() {return _reg32(base_.get_virt(), 0x44); } + uint32_t volatile& r_control() {return _reg32(base_.get_virt(), 0x48); } + uint32_t volatile& r_protection() {return _reg32(base_.get_virt(), 0x4c); } + uint32_t volatile& r_idle() {return _reg32(base_.get_virt(), 0x50); } + uint32_t volatile& r_irq_priority() {return _reg32(base_.get_virt(), 0x60); } + uint32_t volatile& r_fiq_priority() {return _reg32(base_.get_virt(), 0x64); } + uint32_t volatile& r_threshold() {return _reg32(base_.get_virt(), 0x68); } + uint32_t volatile& r_itr(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x80+0x20*n); } + uint32_t volatile& r_mir(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x84+0x20*n); } + uint32_t volatile& r_mir_clear(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x88+0x20*n); } + uint32_t volatile& r_mir_set(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x8c+0x20*n); } + uint32_t volatile& r_isr_set(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x90+0x20*n); } + uint32_t volatile& r_isr_clear(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x94+0x20*n); } + uint32_t volatile& r_pending_irq(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x98+0x20*n); } + uint32_t volatile& r_pending_fiq(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x9c+0x20*n); } + uint32_t volatile& r_ilr(int m) {assert(m >= 0 && m <= 95); return _reg32(base_.get_virt(), 0x100+0x4*m); } - uintptr_t base_; + MMIO_alloc base_; std::array handler_tbl_; }; diff --git a/omap35x_prcm.cc b/omap35x_prcm.cc index 7ac5e78..6e3f7dd 100644 --- a/omap35x_prcm.cc +++ b/omap35x_prcm.cc @@ -1,13 +1,14 @@ #include #include - -#include "omap35x_prcm.hh" +#include #include "util.hh" +#include "mmio.hh" +#include "omap35x_prcm.hh" class OMAP35x_prcm_impl { public: - OMAP35x_prcm_impl(uintptr_t cm_base, uintptr_t pm_base) :cm_base_(cm_base), pm_base_(pm_base) { + OMAP35x_prcm_impl(uintptr_t cm_base, uintptr_t pm_base) :cm_base_{cm_base, 28}, pm_base_{pm_base, 42} { // Setup IVA2 domain (unused, disable) r_clkstctrl_iva2() = 0x3; r_wkdep_iva2() = 0; @@ -64,188 +65,194 @@ public: r_wken_per() |= (1<<11); // UART3 wake up enable } + void clear_wake_per(int n) { + assert(n >= 0 && n <= 31); + r_wkst_per() = (1<enable_peripherals(); } + +void OMAP35x_prcm::clear_wake_per(int n) { + impl_->clear_wake_per(n); +} diff --git a/omap35x_prcm.hh b/omap35x_prcm.hh index 9f6fd9a..ea58ee7 100644 --- a/omap35x_prcm.hh +++ b/omap35x_prcm.hh @@ -12,6 +12,8 @@ public: // Enable clock&power for all used peripherals void enable_peripherals(); + + void clear_wake_per(int n); private: std::unique_ptr impl_; diff --git a/phys_mm.cc b/phys_mm.cc new file mode 100644 index 0000000..47e59bd --- /dev/null +++ b/phys_mm.cc @@ -0,0 +1,293 @@ +#include +#include +#include +#include + +#include "util.hh" +#include "phys_mm.hh" + +static constexpr unsigned phys_pages = 65536; // for 256 MiB RAM +static constexpr unsigned size_steps = 17; // log2(256Mi)-log2(4Ki)+1 + +using idx_t = uint16_t; // Must have a range of 0..phys_pages-1 + +struct pb_t { + idx_t next_free_by_size; + uint8_t size_ln2; + unsigned used:1; + unsigned nbs_valid:1; +} __attribute__((packed)); + +static pb_t phys_blocks[phys_pages]; + +static unsigned size_starts[size_steps]; + +// Import symbols from linker +extern uint32_t _kernel_real_pt_end; + +static unsigned _idx(pb_t const& pb) __attribute__((const)); +static unsigned _idx(pb_t const& pb) { + return &pb - phys_blocks; +} + +static pb_t& _next_by_pos(pb_t const& pb) __attribute__((pure)); +static pb_t& _next_by_pos(pb_t const& pb) { + auto idx = _idx(pb); + assert(idx+_pow2(pb.size_ln2) < phys_pages); + + return phys_blocks[idx+_pow2(pb.size_ln2)]; +} + +static bool _is_last_pos(pb_t const& pb) __attribute__((pure)); +static bool _is_last_pos(pb_t const& pb) { + auto idx = _idx(pb); + return (idx+_pow2(pb.size_ln2) >= phys_pages); +} + +// // Caution: Slow, performs linear search +// static pb_t& _prev_by_pos(pb_t const& pb) __attribute__((pure)); +// static pb_t& _prev_by_pos(pb_t const& pb) { +// auto idx = _idx(pb); +// assert(idx != 0); + +// unsigned i = 0, prev_i; +// while(i < idx) { +// prev_i = i; +// i = i + _pow2(phys_blocks[i].size_ln2); +// } +// assert(i == idx); +// return phys_blocks[prev_i]; +// } + +static pb_t& _next_by_size(pb_t const& pb) __attribute__((pure)); +static pb_t& _next_by_size(pb_t const& pb) { + assert(pb.nbs_valid); + + return phys_blocks[pb.next_free_by_size]; +} + + +static void _ll_insert(unsigned& head, pb_t& elem) { + if(head != phys_pages) { + elem.next_free_by_size = head; + elem.nbs_valid = true; + } else + elem.nbs_valid = false; + + head = _idx(elem); +} + +static void _ll_remove(unsigned& head, pb_t& elem) { + assert(head != phys_pages); + + // Remove from head is fast + if(head == _idx(elem)) { + if(elem.nbs_valid) + head = elem.next_free_by_size; + else + head = phys_pages; + elem.nbs_valid = false; + } else { + // Search for previous element + pb_t* it = phys_blocks+head; + while(&_next_by_size(*it) != &elem) { + it = &_next_by_size(*it); + } + + // Remove + it->next_free_by_size = elem.next_free_by_size; + it->nbs_valid = elem.nbs_valid; + elem.nbs_valid = false; + } +} + +// Returns true if block is the left half of the next larger +// allocation block, false if it is the right half +static bool _is_left(pb_t& block) __attribute__((pure)); +static bool _is_left(pb_t& block) { + auto idx = _idx(block); + + return !(idx & _pow2(block.size_ln2)); +} + +// Gets the left half of the allocation block of which +// block is the right half. Only valid if _is_left(block) +// would be false +static pb_t& _get_left(pb_t& block) __attribute__((pure)); +static pb_t& _get_left(pb_t& block) { + return phys_blocks[_idx(block)-_pow2(block.size_ln2)]; +} + +static void _split(pb_t& block) { + assert(block.size_ln2 > 0); + assert(!block.used); + + // Remove from free list for old size + _ll_remove(size_starts[block.size_ln2], block); + + // Split + --block.size_ln2; + auto& next = _next_by_pos(block); + next.size_ln2 = block.size_ln2; + next.used = false; + + // Add to free list for new size + _ll_insert(size_starts[block.size_ln2], block); + _ll_insert(size_starts[block.size_ln2], next); +} + +static void _set_used(pb_t& block) { + _ll_remove(size_starts[block.size_ln2], block); + + block.used = true; +} + +static void _alloc_at(unsigned idx, unsigned size) { + auto& block = phys_blocks[idx]; + + assert(block.size_ln2 >= size); + assert(!block.used); + + while(block.size_ln2 > size) + _split(block); + + _set_used(block); +} + +static unsigned _find_free(unsigned size) { + for(unsigned i = size;i < size_steps;++i) { + if(size_starts[i] != phys_pages) + return size_starts[i]; + } + + return phys_pages; +} + +// Recursivly merge block if possible +static pb_t& _merge(pb_t& block) { + if(block.size_ln2 == size_steps-1) + return block; + + if(_is_left(block) && !_next_by_pos(block).used && + (_next_by_pos(block).size_ln2 == block.size_ln2)) { + // Remove from free list for old size + _ll_remove(size_starts[block.size_ln2], block); + _ll_remove(size_starts[block.size_ln2], _next_by_pos(block)); + + // Merge + ++block.size_ln2; + + // Add to free list for new size + _ll_insert(size_starts[block.size_ln2], block); + + return _merge(block); + } else if (!_is_left(block) && !_get_left(block).used && + (_get_left(block).size_ln2 == block.size_ln2)) { + // Remove from free list for old size + _ll_remove(size_starts[block.size_ln2], block); + _ll_remove(size_starts[block.size_ln2], _get_left(block)); + + // Merge + auto& left = _get_left(block); + ++left.size_ln2; + + // Add to free list for new size + _ll_insert(size_starts[left.size_ln2], left); + + return _merge(left); + } + + return block; +} + + +static void _free_at(unsigned idx) { + auto& block = phys_blocks[idx]; + assert(block.used); + + block.used = false; + _ll_insert(size_starts[block.size_ln2], block); + + // Check merge + _merge(block); +} + +static void _print_elem(pb_t const& block) { + auto idx = _idx(block); + assert(idx < phys_pages); + + printf(" {%u: ", idx); + if(block.nbs_valid) + printf("%u ", block.next_free_by_size); + else + printf("- "); + printf("%u%s}", block.size_ln2, block.used?" U":""); +} + +void phys_mm::init() { + for(unsigned i = 0;i < size_steps-1;++i) + size_starts[i] = phys_pages; + size_starts[size_steps-1] = 0; + + phys_blocks[0].size_ln2 = size_steps-1; + phys_blocks[0].used = false; + phys_blocks[0].nbs_valid = false; + + uintptr_t kernel_size = (uintptr_t)&_kernel_real_pt_end - 0x80000000; + unsigned kernel_pages = kernel_size/4096; + if(kernel_size%4096 != 0) + ++kernel_pages; + + _alloc_at(0, _ln2(kernel_pages)); +} + +uintptr_t phys_mm::alloc(unsigned count) { + unsigned i = _find_free(_ln2(count)); + if(i == phys_pages) + throw bad_alloc{}; + + _alloc_at(i, _ln2(count)); + + return 0x80000000+4096*i; +} + +void phys_mm::free(uintptr_t base) { + unsigned idx = (base-0x80000000)/4096; + _free_at(idx); +} + +void phys_mm::print_state() { + printf("Free lists:\n"); + for(unsigned i = 0;i < size_steps;++i) { + printf("\t%u:", i); + if(size_starts[i] != phys_pages) { + pb_t* it = phys_blocks+size_starts[i]; + while(true) { + _print_elem(*it); + if(!it->nbs_valid) + break; + + it = &_next_by_size(*it); + } + } + printf("\n"); + } + + printf("Blocks:\n"); + pb_t *it = phys_blocks; + while (true) { + _print_elem(*it); + + if(_is_last_pos(*it)) + break; + + it = &_next_by_pos(*it); + } + + printf("\n\n"); +} + +uintptr_t phys_mm::get_end_of_kernel_alloc() { + pb_t& kern = phys_blocks[0]; + return 0x80000000+4096*_idx(_next_by_pos(kern)); +} diff --git a/phys_mm.hh b/phys_mm.hh new file mode 100644 index 0000000..70b4df1 --- /dev/null +++ b/phys_mm.hh @@ -0,0 +1,31 @@ +#ifndef _PHYS_MM_HH_ +#define _PHYS_MM_HH_ + +#include + +#include "exceptions.hh" + +namespace phys_mm { + /* Initialize the physical memory management + Initializes internal data structures and + sets the RAM containing the kernel image to used */ + void init(); + + // Allocate 'count' consecutive pages of physical memory + // Optionally alligned to 'align' pages + uintptr_t alloc(unsigned count); + + // Free 'count' consecutive pages of physical memory starting at 'base' + void free(uintptr_t base); + + void print_state(); + + // Returns the end of the initial physical allocation containing kernel image and data + // For use by mm::init + uintptr_t get_end_of_kernel_alloc(); + + class bad_alloc : public ex::bad_alloc {}; +} + + +#endif diff --git a/syscall.c b/syscall.c index ba84b40..2a4804b 100644 --- a/syscall.c +++ b/syscall.c @@ -1,6 +1,9 @@ #include #include +#include "cortexa8.hh" +#include "mm.hh" + #include #undef errno extern int errno; @@ -65,22 +68,33 @@ int _write(int file, const char *ptr, int len) { } } -extern char __heap_end, __heap_start; +extern uint32_t __heap_start; caddr_t _sbrk(int incr) __attribute__((used)); caddr_t _sbrk(int incr) { - static caddr_t heap_end = 0; + static uintptr_t heap_end = 0; + static uintptr_t brk; - if(heap_end == 0) - heap_end = &__heap_start; - - caddr_t prev_heap_end = heap_end; - if(heap_end + incr > &__heap_end) { - _write(1, "Out of memory\n", 14); - abort(); + if(heap_end == 0) { + heap_end = mm::get_heap_end(); + brk = (uintptr_t)&__heap_start; + } + + if(brk + incr >= heap_end) { + // Allocate additional RAM for heap + try { + unsigned pages = (brk+incr-heap_end+1)/4096; + if((brk+incr-heap_end+1)%4096 != 0) + ++pages; + heap_end = mm::grow_heap(pages); + } catch (ex::bad_alloc &ex) { + _write(1, "Heap allocation failure\n", 24); + abort(); + } } - heap_end += incr; - return prev_heap_end; + caddr_t prev_brk = (caddr_t)brk; + brk += incr; + return prev_brk; } int _kill(int pid, int sig) __attribute__((used)); diff --git a/uart.cc b/uart.cc index 1424367..d731a6f 100644 --- a/uart.cc +++ b/uart.cc @@ -4,12 +4,14 @@ #include "cortexa8.hh" #include "omap35x_intc.hh" -#include "uart.hh" +#include "omap35x_prcm.hh" #include "util.hh" +#include "mmio.hh" +#include "uart.hh" class UART_impl { public: - UART_impl(uintptr_t base, int irq) : base_(base), irq_(irq) { +UART_impl(uintptr_t base, int irq, OMAP35x_prcm& prcm) : base_{base}, irq_(irq), prcm_(prcm) { OMAP35x_intc::get().register_handler(irq_, std::bind(&UART_impl::recv_handler, this), 1); OMAP35x_intc::get().enable_int(irq_); @@ -105,59 +107,57 @@ private: } newdata_ = true; - *prcm_wkst_per = (1<<11); // Clear UART3 wake bit + prcm_.clear_wake_per(11); } - uintptr_t base_; + MMIO_alloc base_; int irq_; + OMAP35x_prcm& prcm_; uint8_t volatile& r_data() { - return _reg8(base_, 0); + return _reg8(base_.get_virt(), 0); } uint8_t volatile& r_dll() { - return _reg8(base_, 0); + return _reg8(base_.get_virt(), 0); } uint8_t volatile& r_dlh() { - return _reg8(base_, 4); + return _reg8(base_.get_virt(), 4); } uint8_t volatile& r_ier() { - return _reg8(base_, 4); + return _reg8(base_.get_virt(), 4); } uint8_t volatile& r_fcr() { - return _reg8(base_, 8); + return _reg8(base_.get_virt(), 8); } uint8_t volatile& r_efr() { - return _reg8(base_, 8); + return _reg8(base_.get_virt(), 8); } uint8_t volatile& r_lcr() { - return _reg8(base_, 0xc); + return _reg8(base_.get_virt(), 0xc); } uint8_t volatile& r_lsr() { - return _reg8(base_, 0x14); + return _reg8(base_.get_virt(), 0x14); } uint8_t volatile& r_ssr() { - return _reg8(base_, 0x44); + return _reg8(base_.get_virt(), 0x44); } uint8_t volatile& r_sysc() { - return _reg8(base_, 0x54); + return _reg8(base_.get_virt(), 0x54); } uint8_t volatile& r_wer() { - return _reg8(base_, 0x5c); + return _reg8(base_.get_virt(), 0x5c); } - volatile uint32_t *const prcm_wkst_per = (uint32_t*)0x483070b0; - - static const size_t RECVBUFFERSIZE = 128; std::array recvbuffer_; volatile size_t recvbuffer_rdptr_ = (RECVBUFFERSIZE-1), recvbuffer_wrptr_ = 0; @@ -184,7 +184,7 @@ private: } }; -UART::UART(uintptr_t base, int irq) : impl_(new UART_impl(base, irq)) { +UART::UART(uintptr_t base, int irq, OMAP35x_prcm& prcm) : impl_(new UART_impl(base, irq, prcm)) { } UART::~UART() { @@ -197,3 +197,48 @@ void UART::write(const char *data, int const& len) { int UART::read(char *buf, int const& len) { return impl_->read(buf, len); } + + +void EarlyUART::write(char const* data, int const& len) { + for(int i = 0;i < len;++i) + sendb(*data++); +} + +int EarlyUART::read(char *buf, int const& len) { + char rd = r_data(); + if(rd == '\r') + rd = '\n'; + sendb(rd); + buf[0] = rd; + return 1; +} + +uint8_t volatile& EarlyUART::r_data() { + return _reg8(base_, 0); +} + +uint8_t volatile& EarlyUART::r_lsr() { + return _reg8(base_, 0x14); +} + +uint8_t volatile& EarlyUART::r_ssr() { + return _reg8(base_, 0x44); +} + +void EarlyUART::_wait_txnotfull() { + while(r_ssr() & 0x1) {} +} + +void EarlyUART::_wait_rxnotempty() { + while(!(r_lsr() & 0x1)) {} +} + +void EarlyUART::sendb(char b) { + _wait_txnotfull(); + r_data() = b; +} + +char EarlyUART::recvb() { + _wait_rxnotempty(); + return r_data(); +} diff --git a/uart.hh b/uart.hh index 1b3135f..12c64dd 100644 --- a/uart.hh +++ b/uart.hh @@ -4,6 +4,8 @@ #include #include +class OMAP35x_prcm; + class ICharacterDevice { public: virtual void write(char const* data, int const& len) = 0; @@ -14,7 +16,7 @@ class UART_impl; class UART : public ICharacterDevice { public: - UART(uintptr_t base, int irq); + UART(uintptr_t base, int irq, OMAP35x_prcm& prcm); ~UART(); virtual void write(char const* data, int const& len); @@ -25,4 +27,25 @@ private: std::unique_ptr impl_; }; +class EarlyUART : public ICharacterDevice { +public: + EarlyUART() {} + ~EarlyUART() {} + + virtual void write(char const* data, int const& len); + virtual int read(char *buf, int const& len); + +private: + void _wait_txnotfull(); + void _wait_rxnotempty(); + void sendb(char b); + char recvb(); + + uint8_t volatile& r_data(); + uint8_t volatile& r_lsr(); + uint8_t volatile& r_ssr(); + + static const uintptr_t base_ = 0xfffff000; +}; + #endif diff --git a/util.hh b/util.hh index 8bb04c6..3f5bf77 100644 --- a/util.hh +++ b/util.hh @@ -2,6 +2,7 @@ #define _UTIL_HH_ #include +#include // Functions to access hardware registers. GCC will generate memory access instructions of the correct width. // Usage: "_reg8(base, ofs) = 0xf0;" to set @@ -22,4 +23,21 @@ constexpr inline uint32_t volatile& _reg32(uintptr_t const& base, size_t const& return *reinterpret_cast(base+ofs); } +// log2(n) +inline unsigned _ln2(unsigned n) noexcept __attribute__((const)); +inline unsigned _ln2(unsigned n) noexcept { + uint32_t reg; + asm ("clz %[dst], %[src]" + : [dst] "=r"(reg) : [src] "r"(n)); + reg = 31-reg; + if(n & ~(1<