diff --git a/Makefile b/Makefile
index b1989f0..c491ba8 100644
--- a/Makefile
+++ b/Makefile
@@ -5,10 +5,10 @@ OBJCOPY=arm-none-eabi-objcopy
 OBJDUMP=arm-none-eabi-objdump
 
 #CFLAGS=-ffreestanding -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp -mfpu=neon --std=gnu99 -ggdb -Wall -Wextra -pedantic -Wno-unused-parameter -Og -flto
-CXXFLAGS=-ffreestanding -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp -mfpu=neon --std=c++11 -ggdb -Wall -Wextra -pedantic -Wno-unused-parameter -fstrict-enums -Wabi -Og -flto
+CXXFLAGS=-ffreestanding -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp -mfpu=neon --std=c++11 -ggdb -Wall -Wextra -pedantic -Wno-unused-parameter -fno-rtti -fstrict-enums -Wabi -Og -flto
 LDFLAGS=-static
-C_SRCS=drv_omap35x_gpt.c omap35x_intc.c omap35x_prcm.c syscall.c
-CXX_SRCS=cortexa8.cc main.cc omap35x.cc uart.cc
+C_SRCS=syscall.c
+CXX_SRCS=cortexa8.cc drv_omap35x_gpt.cc main.cc mm.cc omap35x.cc omap35x_intc.cc omap35x_prcm.cc phys_mm.cc uart.cc
 S_SRCS=cortexa8_asm.s syscall_asm.s
 
 OBJS=$(addprefix objs/,$(C_SRCS:.c=.o)) $(addprefix objs/,$(CXX_SRCS:.cc=.o)) $(addprefix objs/,$(S_SRCS:.s=.o))
@@ -29,7 +29,7 @@ beagle-nand.bin:	fw.img
 	./bb_nandflash_ecc $@ 0x0 0xe80000 || true
 
 qemu:	beagle-nand.bin
-	qemu-system-arm -M beagle -m 256M -mtdblock beagle-nand.bin -nographic
+	qemu-system-arm -M beagle -m 256M -mtdblock beagle-nand.bin -nographic -s
 
 objs/%.o:	%.c
 	$(CXX) $(CXXFLAGS) -c -MMD -MP -o $@ $<
diff --git a/cortexa8.cc b/cortexa8.cc
index 6a93124..92bb834 100644
--- a/cortexa8.cc
+++ b/cortexa8.cc
@@ -3,6 +3,7 @@
 #include <cstdio>
 #include <cassert>
 
+#include "phys_mm.hh"
 #include "cortexa8.hh"
 
 extern int _vect_table;
@@ -65,7 +66,7 @@ void cortexa8::disable_dcache() noexcept {
 			: [reg] "=r"(reg));
 }
 
-typedef struct { 
+struct ptentry_section_t { 
   unsigned :1;
   unsigned one:1;
   unsigned b:1;
@@ -81,78 +82,179 @@ typedef struct {
   unsigned :1;
   unsigned ns:1;
   unsigned baseadr:12;
-} ptentry_section_t;
+};
 
-static ptentry_section_t ttable1[4096] __attribute__((aligned(16384)));
+struct ptentry_ptl2_t {
+  unsigned one:1;
+  unsigned :2;
+  unsigned ns:1;
+  unsigned :1;
+  unsigned domain:4;
+  unsigned :1;
+  unsigned ptadr:22;
+};
+
+struct ptl2entry_page_t {
+  unsigned xn:1;
+  unsigned one:1;
+  unsigned b:1;
+  unsigned c:1;
+  unsigned ap:2;
+  unsigned tex:3;
+  unsigned ap2:1;
+  unsigned s:1;
+  unsigned ng:1;
+  unsigned baseadr:20;
+};
+
+// Import symbols from linker
+extern uint32_t _kernel_ptl1;
+extern uint32_t _kernel_ptl2;
+extern uint32_t _kernel_real_numl2pt;
+extern uint32_t _kernel_numl2ent;
+extern uint32_t __io_start;
+
+// Fix some variables
+static const uintptr_t io_start = (uintptr_t)&__io_start;
+static const unsigned numl2pt = (unsigned)&_kernel_real_numl2pt;
+static const unsigned numl2ent = (unsigned)&_kernel_numl2ent;
+
+static volatile uint32_t *const ttable1 = &_kernel_ptl1;
+static volatile uint32_t *const kern_ptl2 = &_kernel_ptl2;
+static uint32_t volatile* ttable1_virt[4096];
+
+// Map console UART I/O region statically, for debug output while
+// initializing memory management
+static volatile ptl2entry_page_t ttable2_earlyio[256] __attribute__((aligned(1024)));
+
+// Map first section of pagetable map area
+static volatile ptl2entry_page_t ttable2_ptmap[256] __attribute__((aligned(1024)));
+
+bool _mmu_is_init = false;
+static unsigned ptmap_area_top = 256, ptmap_area_free = 0;
 
 static void _init_ttable() noexcept {
-  // Create 1:1 translation table for entire address space with appropriate memory types
   memset((void*)ttable1, 0, 16384);
 
-  // 0x00000000..0x7fffffff MMIO (Non-cacheable)
-  for(int i = 0;i < 2048;++i) {
-    ttable1[i].baseadr = i;
-    ttable1[i].one = 1;
+  memset((void*)kern_ptl2, 0, 1024*numl2pt);
+  memset((void*)ttable2_earlyio, 0, 1024);
+  memset((void*)ttable2_ptmap, 0, 1024);
 
-    // Read/write at any privilege
-    ttable1[i].ap2 = 0;
-    ttable1[i].ap = 0x3;
-    // Shareable device
-    ttable1[i].tex = 0;
-    ttable1[i].c = 0;
-    ttable1[1].b = 1;
-  };
+  for(unsigned i = 0;i < numl2pt;++i) {
+    ttable1_virt[2048+i] = kern_ptl2 + 256*i;
+    ttable1[2048+i] = (uint32_t)(kern_ptl2+256*i) | 0x1;
+  }
+  ttable1_virt[4095] = (uint32_t*)ttable2_earlyio;
+  ttable1[4095] = (uint32_t)(ttable2_earlyio) | 0x1;
+  ttable1_virt[3072] = (uint32_t*)ttable2_ptmap;
+  ttable1[3072] = (uint32_t)(ttable2_ptmap) | 0x1;
 
-  // 0x80000000..0x8fffffff RAM (Cacheable)
-  for(int i = 2048;i < 2304;++i) {
-    ttable1[i].baseadr = i;
-    ttable1[i].one = 1;
+  for(unsigned i = 0;i < numl2ent;++i) {
+    kern_ptl2[i] = (0x80000000+0x1000*i) | 0x576;
+  }
 
-    // Read/write at any privilege
-    ttable1[i].ap2 = 0;
-    ttable1[i].ap = 0x3;
-    // Cacheable
-    ttable1[i].tex = 0x5;
-    ttable1[i].c = 0;
-    ttable1[i].b = 1;
-    ttable1[i].s = 1;
-  };
-
-  // 0x90000000..0xffffffff ??? (Non-cacheable)
-  for(int i = 2304;i < 4095;++i) {
-    ttable1[i].baseadr = i;
-    ttable1[i].one = 1;
-
-    // Read/write at any privilege
-    ttable1[i].ap2 = 0;
-    ttable1[i].ap = 0x3;
-    // Shareable device
-    ttable1[i].tex = 0;
-    ttable1[i].c = 0;
-    ttable1[1].b = 1;
-  };
-    
+  ttable2_earlyio[255].baseadr = 0x49020000>>12;
+  ttable2_earlyio[255].one = 1;
+  ttable2_earlyio[255].ap2 = 0;
+  ttable2_earlyio[255].ap = 0x3;
+  ttable2_earlyio[255].tex = 0;
+  ttable2_earlyio[255].c = 0;
+  ttable2_earlyio[255].b = 1;
 }
 
+// static ptentry_section_t ttable1[4096] __attribute__((aligned(16384)));
+
+// //int test[1048576] __attribute__((__used__));
+
+// static void _init_ttable() noexcept {
+//   // Create 1:1 translation table for entire address space with appropriate memory types
+//   memset((void*)ttable1, 0, 16384);
+
+//   // 0x00000000..0x7fffffff MMIO (Non-cacheable)
+//   for(int i = 0;i < 2048;++i) {
+//     ttable1[i].baseadr = i;
+//     ttable1[i].one = 1;
+
+//     // Read/write at any privilege
+//     ttable1[i].ap2 = 0;
+//     ttable1[i].ap = 0x3;
+//     // Shareable device
+//     ttable1[i].tex = 0;
+//     ttable1[i].c = 0;
+//     ttable1[1].b = 1;
+//   };
+
+//   // 0x80000000..0x8fffffff RAM (Cacheable)
+//   for(int i = 2048;i < 2304;++i) {
+//     ttable1[i].baseadr = i;
+//     ttable1[i].one = 1;
+
+//     // Read/write at any privilege
+//     ttable1[i].ap2 = 0;
+//     ttable1[i].ap = 0x3;
+//     // Cacheable
+//     ttable1[i].tex = 0x5;
+//     ttable1[i].c = 0;
+//     ttable1[i].b = 1;
+//     ttable1[i].s = 1;
+//   };
+
+//   // 0x90000000..0xffffffff ??? (Non-cacheable)
+//   for(int i = 2304;i < 4095;++i) {
+//     ttable1[i].baseadr = i;
+//     ttable1[i].one = 1;
+
+//     // Read/write at any privilege
+//     ttable1[i].ap2 = 0;
+//     ttable1[i].ap = 0x3;
+//     // Shareable device
+//     ttable1[i].tex = 0;
+//     ttable1[i].c = 0;
+//     ttable1[1].b = 1;
+//   };
+    
+// }
+
 void cortexa8::init_mmu() noexcept {
   _init_ttable();
   
-  // Set Translation Table base Ptr 0
-  uint32_t reg = ((uint32_t)ttable1) & 0xffffc000u;
+  // Set Translation Table Base Register 1
+  uint32_t reg = ((uint32_t)ttable1);
   reg |= 0xb;
-  __asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 0"
+  __asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 1"
 			: : [val] "r"(reg));
 
+  // Set Translation Table Base Control Register
+  __asm__ __volatile__ ("mcr 15, 0, %[val], c2, c0, 2"
+			: : [val] "r"(0x01));
+
   // Set domains register
   __asm__ __volatile__ ("mcr 15, 0, %[val], c3, c0, 0"
 			: : [val] "r"(0x1));
 
-  // Flush trans. table from L1$
+  // Flush L1 page table from L1$
   for(int i = 0;i < 256;++i) {
     reg = ((uint32_t)ttable1)+i*64;
     __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
 			  : : [val] "r"(reg));
   }
+  // Flush L2 page tables from L1$
+  for(unsigned j = 0;j < numl2pt;++j)
+    for(int i = 0;i < 16;++i) {
+      reg = ((uint32_t)kern_ptl2)+j*1024+i*64;
+      __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
+			  : : [val] "r"(reg));
+    }
+  for(int i = 0;i < 16;++i) {
+    reg = ((uint32_t)ttable2_earlyio)+i*64;
+    __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
+			  : : [val] "r"(reg));
+  }
+  for(int i = 0;i < 16;++i) {
+    reg = ((uint32_t)ttable2_ptmap)+i*64;
+    __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
+			  : : [val] "r"(reg));
+  }
   __asm__ __volatile__ ("dsb");
 
   // Invalidate TLBs
@@ -161,8 +263,128 @@ void cortexa8::init_mmu() noexcept {
   // Enable MMU
   __asm__ __volatile__ ("mrc 15, 0, %[reg], c1, c0, 0; orr %[reg], %[reg], #0x1; mcr 15, 0, %[reg], c1, c0, 0; isb"
 			: [reg] "=r"(reg));
+
+  _mmu_is_init = true;
 }
 
+static void _map_page(unsigned l1, unsigned l2, uintptr_t phys, uint32_t mode) {
+  ttable1_virt[l1][l2] = phys | mode;
+}
+
+static bool recursing = false;
+
+void cortexa8::map_pages(uintptr_t virt, uintptr_t phys, unsigned count) {
+  assert(_mmu_is_init);
+  // Check alignment
+  assert((virt&0xfff) == 0);
+  assert((phys&0xfff) == 0);
+  
+  unsigned virt_l1 = virt/1048576, virt_l2 = (virt%1048576)/4096;
+  unsigned virt_end_l1 = (virt+count*4096-1)/1048576, virt_end_l2 = ((virt+count*4096-1)%1048576)/4096;
+  if(virt_l1 != virt_end_l1) { // Allocation spans multiple L2 pagetables
+    assert(!recursing);
+    for(unsigned i = virt_l1;i <= virt_end_l1;++i) {
+      unsigned cur_l2 = (virt%1048576)/4096;
+      unsigned pages_on_i = (cur_l2+count>256)?(256-cur_l2):count;
+      recursing = true;
+      map_pages(virt, phys, pages_on_i);
+      count -= pages_on_i;
+      virt += pages_on_i*4096;
+      phys += pages_on_i*4096;
+    }
+    recursing = false;
+  }
+  
+  bool hw = (virt>=io_start);
+
+  // Check if L2 pagetable for area exists
+  if(ttable1_virt[virt_l1] == 0) {
+    // Allocate and build new L2 pt
+    if(ptmap_area_free == ptmap_area_top) {
+      // Expand L2 pagetable map area
+      assert(false && "NYI");
+    }
+    
+    // Allocate memory
+    uintptr_t newpt_p = phys_mm::alloc(1);
+    // Map to pt map area
+    uintptr_t newpt_v = 0xc0000000 + ptmap_area_free++*4096;
+    map_pages(newpt_v, newpt_p, 1);
+    
+    // Install new L2 page table
+    memset((void*)newpt_v, 0, 4096);
+    // There are 4 L2 page tables on a page
+    for(int i = 0;i < 4;++i) {
+      ttable1_virt[virt_l1+i] = (uint32_t*)(newpt_v+1024*i);
+      ttable1[virt_l1+i] = (newpt_p+1024*i) | 0x1;
+    }
+    // Flush L1 pt entry from L1D$
+    __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1; dsb"
+			  : : [val] "r"(ttable1+virt_l1));
+    if(((uintptr_t)(ttable1+virt_l1)%64) != ((uintptr_t)(ttable1+virt_l1+16)%64))
+      __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1; dsb"
+			    : : [val] "r"(ttable1+virt_l1+16));
+  }
+
+  // Map in L2 pt
+  for(unsigned i = virt_l2;i <= virt_end_l2;++i) {
+    _map_page(virt_l1, i, phys, hw?0x036:0x576);
+    phys += 4096;
+  }
+  
+  // Flush L2 pt from L1D$
+  unsigned l2_cache_s = virt_l2/16, l2_cache_e = virt_end_l2/16 + (virt_end_l2%16==0)?0:1;
+  for(unsigned i = l2_cache_s;i <= l2_cache_e;++i) {
+    uint32_t reg = ((uint32_t)ttable1_virt[virt_l1])+i*64;
+    __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
+			  : : [val] "r"(reg));
+  }
+  __asm__ __volatile__ ("dsb");
+
+  // Invalidate TLBs
+  for(uintptr_t va = virt; va < virt+4096*count;va += 4096)
+    __asm__ __volatile__("mcr 15, 0, %[mva], c8, c7, 1"
+			 : : [mva] "r"(va));
+  
+  __asm__ __volatile__ ("isb");
+}
+
+void cortexa8::unmap_pages(uintptr_t virt, unsigned count) {
+  assert(_mmu_is_init);
+  // Check alignment
+  assert((virt&0xfff) == 0);
+  
+  unsigned virt_l1 = virt/1048576, virt_l2 = (virt%1048576)/4096;
+  unsigned virt_end_l1 = (virt+count*4096-1)/1048576, virt_end_l2 = ((virt+count*4096-1)%1048576)/4096;
+  
+  // Clear L2 pagetable entries
+  for(unsigned l1 = virt_l1;l1 <= virt_end_l1;++l1) {
+    assert(ttable1_virt[l1] != nullptr);
+    for(unsigned l2 = (l1==virt_l1)?virt_l2:0;l2 <= (l1==virt_end_l1)?virt_end_l2:255;++l2) {
+      ttable1_virt[l1][l2] = 0;
+    }
+  }
+  
+  // Flush L2 pagetables from L1D$
+  for(unsigned l1 = virt_l1;l1 <= virt_end_l1;++l1) {
+    for(unsigned cl = (l1==virt_l1)?virt_l2/16:0;
+	cl <= (l1==virt_end_l1)?(virt_end_l2/16+((virt_end_l2%16==0)?0:1)):15;
+	++cl) {
+      __asm__ __volatile__ ("mcr 15, 0, %[val], c7, c11, 1"
+			    : : [val] "r"(ttable1_virt[l1]+cl*16));
+    }
+  }
+  __asm__ __volatile__ ("dsb");
+
+  // Invalidate TLBs
+  for(uintptr_t va = virt; va < virt+4096*count;va += 4096)
+    __asm__ __volatile__("mcr 15, 0, %[mva], c8, c7, 1"
+			 : : [mva] "r"(va));
+  
+  __asm__ __volatile__ ("isb");
+}
+      
+
 extern uint32_t __stack_excp;
 extern uint32_t __stack_int;
 void cortexa8::init_handlers() noexcept {
diff --git a/cortexa8.hh b/cortexa8.hh
index a88acd7..598a3b7 100644
--- a/cortexa8.hh
+++ b/cortexa8.hh
@@ -22,6 +22,8 @@ namespace cortexa8 {
 
   void init_handlers() noexcept;
 
+  void map_pages(uintptr_t virt, uintptr_t phys, unsigned count);
+  void unmap_pages(uintptr_t virt, unsigned count);
 }
 
 // Implemented in cortexa8_asm.s
diff --git a/drv_omap35x_gpt.cc b/drv_omap35x_gpt.cc
index 0f6978b..d576d8b 100644
--- a/drv_omap35x_gpt.cc
+++ b/drv_omap35x_gpt.cc
@@ -3,9 +3,10 @@
 #include <cassert>
 #include <functional>
 
-#include "drv_omap35x_gpt.hh"
 #include "omap35x_intc.hh"
 #include "util.hh"
+#include "mmio.hh"
+#include "drv_omap35x_gpt.hh"
 
 #define TIOCP_CFG 4
 #define TISR 6
@@ -52,28 +53,28 @@ private:
     r_tisr() = 0x2;
   }
   
-  uint32_t volatile& r_tidr() {return _reg32(base_, 0x0); }
-  uint32_t volatile& r_tiocp_cfg() {return _reg32(base_, 0x10); }
-  uint32_t volatile& r_tistat() {return _reg32(base_, 0x14); }
-  uint32_t volatile& r_tisr() {return _reg32(base_, 0x18); }
-  uint32_t volatile& r_tier() {return _reg32(base_, 0x1c); }
-  uint32_t volatile& r_twer() {return _reg32(base_, 0x20); }
-  uint32_t volatile& r_tclr() {return _reg32(base_, 0x24); }
-  uint32_t volatile& r_tcrr() {return _reg32(base_, 0x28); }
-  uint32_t volatile& r_tldr() {return _reg32(base_, 0x2c); }
-  uint32_t volatile& r_ttgr() {return _reg32(base_, 0x30); }
-  uint32_t volatile& r_twps() {return _reg32(base_, 0x34); }
-  uint32_t volatile& r_tmar() {return _reg32(base_, 0x38); }
-  uint32_t volatile& r_tcar1() {return _reg32(base_, 0x3c); }
-  uint32_t volatile& r_tsicr() {return _reg32(base_, 0x40); }
-  uint32_t volatile& r_tcar2() {return _reg32(base_, 0x44); }
-  uint32_t volatile& r_tpir() {return _reg32(base_, 0x48); }
-  uint32_t volatile& r_tnir() {return _reg32(base_, 0x4c); }
-  uint32_t volatile& r_tcvr() {return _reg32(base_, 0x50); }
-  uint32_t volatile& r_tocr() {return _reg32(base_, 0x54); }
-  uint32_t volatile& r_towr() {return _reg32(base_, 0x58); }
+  uint32_t volatile& r_tidr() {return _reg32(base_.get_virt(), 0x0); }
+  uint32_t volatile& r_tiocp_cfg() {return _reg32(base_.get_virt(), 0x10); }
+  uint32_t volatile& r_tistat() {return _reg32(base_.get_virt(), 0x14); }
+  uint32_t volatile& r_tisr() {return _reg32(base_.get_virt(), 0x18); }
+  uint32_t volatile& r_tier() {return _reg32(base_.get_virt(), 0x1c); }
+  uint32_t volatile& r_twer() {return _reg32(base_.get_virt(), 0x20); }
+  uint32_t volatile& r_tclr() {return _reg32(base_.get_virt(), 0x24); }
+  uint32_t volatile& r_tcrr() {return _reg32(base_.get_virt(), 0x28); }
+  uint32_t volatile& r_tldr() {return _reg32(base_.get_virt(), 0x2c); }
+  uint32_t volatile& r_ttgr() {return _reg32(base_.get_virt(), 0x30); }
+  uint32_t volatile& r_twps() {return _reg32(base_.get_virt(), 0x34); }
+  uint32_t volatile& r_tmar() {return _reg32(base_.get_virt(), 0x38); }
+  uint32_t volatile& r_tcar1() {return _reg32(base_.get_virt(), 0x3c); }
+  uint32_t volatile& r_tsicr() {return _reg32(base_.get_virt(), 0x40); }
+  uint32_t volatile& r_tcar2() {return _reg32(base_.get_virt(), 0x44); }
+  uint32_t volatile& r_tpir() {return _reg32(base_.get_virt(), 0x48); }
+  uint32_t volatile& r_tnir() {return _reg32(base_.get_virt(), 0x4c); }
+  uint32_t volatile& r_tcvr() {return _reg32(base_.get_virt(), 0x50); }
+  uint32_t volatile& r_tocr() {return _reg32(base_.get_virt(), 0x54); }
+  uint32_t volatile& r_towr() {return _reg32(base_.get_virt(), 0x58); }
 
-  uintptr_t base_;
+  MMIO_alloc base_;
   int irq_;
   int_handler_t handler_;
 };
diff --git a/exceptions.hh b/exceptions.hh
new file mode 100644
index 0000000..923b027
--- /dev/null
+++ b/exceptions.hh
@@ -0,0 +1,8 @@
+#ifndef _EXCEPTIONS_HH_
+#define _EXCEPTIONS_HH_
+
+namespace ex {
+  class bad_alloc{};
+}
+
+#endif
diff --git a/fw_cxx.ld b/fw_cxx.ld
index b9fbb46..1f85796 100644
--- a/fw_cxx.ld
+++ b/fw_cxx.ld
@@ -182,27 +182,50 @@ SECTIONS
    . = ALIGN(. != 0 ? 32 / 8 : 1);
   }
   _bss_end__ = . ; __bss_end__ = . ;
-  . = ALIGN(32 / 8);
-  . = ALIGN(32 / 8);
+
+  . = ALIGN(64);
+  . = . + 0x4000; /* 64KiB exception stack */
+  __stack_excp = .;
+  . = . + 0x4000; /* 64KiB interrupt stack */
+  __stack_int = .;
+  . = . + 0x4000; /* 64KiB kernel startup stack */
+  __stack = .;
+
   __end__ = . ;
   _end = .; PROVIDE (end = .);
 
-  . = ALIGN(64);
-  .stack          :
-  {
-    . = . + 0x200000; /* 2MiB stack should be enough... */
-    __stack = .;
+  /* L1 page table must be aligned at 16KiB boundary */
+  . = ALIGN(16384);
+  _kernel_ptl1 = .;
+  . = . + 0x4000; /* Allocate 16KiB for L1 PT */
+  _kernel_ptl2 = .;
+  /* Calculate the number of L2 PTs to cover the kernel image + bss */
+  _kernel_numl2pt = ((_end - 0x80000000) / 1048576) + (((_end - 0x80000000) % 1048576)==0?0:1);
+  /* Allocate 1KiB for each L2 PT */
+  . = . + (0x400 * _kernel_numl2pt);
+  _kernel_pt_end = .;
+  /* If the page tables cross a L1 PT entry boundary (1MiB), signal to the 
+     initialization code that an additional L1 entry and L2 page table must
+     be created for the memory containing the page tables */
+  _kernel_additional_pt = ((_kernel_pt_end & 0xfff00000) == (_kernel_ptl1 & 0xfff00000))?0:1;
+  _kernel_real_numl2pt = _kernel_numl2pt + _kernel_additional_pt;
+  . = . + (0x400 * _kernel_additional_pt);
+  _kernel_real_pt_end = .;
+  _kernel_numl2ent = ((_kernel_real_pt_end - 0x80000000) / 4096) + (((_kernel_real_pt_end - 0x80000000) % 4096)==0?0:1);
 
-    *(.stack)
-  }
-  . = ALIGN(54);
-  . = . + 0x10000; /* 64KiB exception stack */
-  __stack_excp = .;
-  . = . + 0x10000; /* 64KiB interrupt stack */
-  __stack_int = .;
   __heap_start = .; /* for _sbrk */
-  /* Rest until end of RAM is heap */
-  __heap_end = 0x90000000;
+  /* Virtual address space from end of kernel to 0xbfffffff is heap */
+  __heap_end = 0xc0000000;
+
+  /* Virtual address space for kernel scratchpad */
+  __scratch_start = 0xc0000000;
+  /* Map: 
+     0xc0000000 - 0xc0800000	Kernel L2 pagetables
+  */
+  __scratch_end = 0xe0000000;
+  /* Virtual address space for MMIO */
+  __io_start = 0xe0000000;
+  __io_end = 0xfffff000;
 
   /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
 }
diff --git a/main.cc b/main.cc
index ae4b7c9..351f78c 100644
--- a/main.cc
+++ b/main.cc
@@ -8,42 +8,63 @@
 #include "omap35x.hh"
 #include "omap35x_intc.hh"
 #include "omap35x_prcm.hh"
+#include "phys_mm.hh"
+#include "mm.hh"
 #include "cortexa8.hh"
 #include "uart.hh"
 
-static volatile uint32_t *const prcm_wkst_per = (uint32_t*)0x483070b0;
-
 static volatile uint32_t tickctr = 0;
+static OMAP35x_prcm* _prcm = nullptr;
 void tickfunc() noexcept {
   ++tickctr;
-  *prcm_wkst_per = (1<<3); // Clear GPT2 wake bit
+  
+  _prcm->clear_wake_per(3);
 }
 
 void setConsole(ICharacterDevice* newConsole);
 
 int main(int argc, char* argv[]) {
-  // Enable caches
-  cortexa8::enable_icache();
+  // Initialize memory
   cortexa8::enable_dcache();
+  cortexa8::enable_icache();
+  cortexa8::init_mmu();
+
+  // Enable early console
+  EarlyUART earlyUART{};
+  setConsole(&earlyUART);
+
+  // Install handlers
+  cortexa8::init_handlers();
+
+  // Initialize physical memory managment
+  phys_mm::init();  
+  
+  // Initialize kernel dynamic memory management
+  mm::init();
+  // From here on, malloc/free and new/delete may be used
+
+  phys_mm::print_state();
 
   // Configure PRCM
   OMAP35x_prcm prcm {0x48004000, 0x48306000};
+  _prcm = &prcm;
+
+  //while(1) {__asm__ __volatile__ ("wfi"); }
 
   // Configure interrrupt & exception handling
-  cortexa8::init_handlers();
   OMAP35x_intc intc {0x48200000};
 
-  cortexa8::init_mmu();
-
   prcm.enable_peripherals();
 
-  UART consoleUART {0x49020000, 74};
+  UART consoleUART {0x49020000, 74, prcm};
   setConsole(&consoleUART);
 
   // Enable interrupts
   cortexa8_ena_int();
+  
+  OMAP35x_Info chipInfo{0x48002000, 0x4830a000};
 
-  omap35x_print_chip_id();
+  chipInfo.print_chip_id();
 
   printf("5\n");
 
@@ -63,6 +84,9 @@ int main(int argc, char* argv[]) {
       break;
   }
 
+  malloc_stats();
+  phys_mm::print_state();
+
   while(1) {
     __asm__ __volatile__ ("wfi");
     if(tickctr%100 == 0) {
@@ -70,9 +94,7 @@ int main(int argc, char* argv[]) {
       fflush(stdout);
     }
   }
-    
-
-  malloc_stats();
+  
   return 0;
 }
 
diff --git a/mm.cc b/mm.cc
new file mode 100644
index 0000000..da44db8
--- /dev/null
+++ b/mm.cc
@@ -0,0 +1,60 @@
+#include <cstdint>
+#include <cassert>
+
+#include "cortexa8.hh"
+#include "phys_mm.hh"
+#include "util.hh"
+#include "mm.hh"
+
+extern uint32_t __scratch_start, __scratch_end, __io_start, __io_end, __heap_start, __heap_end;
+
+static const uintptr_t scratch_start = (uintptr_t)&__scratch_start;
+static const uintptr_t scratch_end = (uintptr_t)&__scratch_end;
+static const uintptr_t io_start = (uintptr_t)&__io_start;
+static const uintptr_t io_end = (uintptr_t)&__io_end;
+static const uintptr_t heap_start = (uintptr_t)&__heap_start;
+static const uintptr_t heap_end = (uintptr_t)&__heap_end;
+
+static uintptr_t heap_top, io_top;
+
+void mm::init() {
+  heap_top = phys_mm::get_end_of_kernel_alloc();
+  
+  /* Map unused area of kernel image RAM to heap
+     The space between the end of the kernel and the end of 
+     the power-of-2 block allocated for it by physical allocater is
+     lost otherwise */
+  uintptr_t heap_start_align = ((heap_start&0xfff)==0)?heap_start:((heap_start&~0xfff)+4096);
+  cortexa8::map_pages(heap_start_align, heap_start_align, (heap_top-heap_start)/4096);
+
+  io_top = io_start;
+}
+
+uintptr_t mm::virtalloc_io(unsigned pages) {
+  if(io_top+0x1000*pages >= io_end)
+    throw bad_alloc{};
+  
+  uintptr_t ret = io_top;
+  io_top += 0x1000*pages;
+  
+  return ret;
+}
+
+uintptr_t mm::grow_heap(unsigned pages) {
+  // Allocations are done in powers to 2, so round up pages to next power of 2
+  pages = _pow2(_ln2(pages));
+
+  if(heap_top+0x1000*pages >= heap_end)
+    throw bad_alloc{};
+  
+  uintptr_t newphys = phys_mm::alloc(pages);
+
+  cortexa8::map_pages(heap_top, newphys, pages);
+
+  heap_top += 0x1000*pages;
+  return heap_top;
+}
+
+uintptr_t mm::get_heap_end() {
+  return heap_top;
+}
diff --git a/mm.hh b/mm.hh
new file mode 100644
index 0000000..ea372e2
--- /dev/null
+++ b/mm.hh
@@ -0,0 +1,26 @@
+#ifndef _MM_HH_
+#define _MM_HH_
+
+#include <cstdint>
+
+#include "exceptions.hh"
+
+namespace mm {
+  // Initialize memory management
+  // Physical memory management must be initialized before calling mm::init
+  void init();
+
+  // Allocate 'pages' pages of virtual address space in the I/O region
+  uintptr_t virtalloc_io(unsigned pages);
+
+  // Grow the kernel heap by 'pages' pages
+  // Allocate and map the desired amount of memory
+  // Return the new heap end
+  uintptr_t grow_heap(unsigned pages);
+  
+  uintptr_t get_heap_end();
+
+  class bad_alloc : public ex::bad_alloc {};
+}
+
+#endif
diff --git a/mmio.hh b/mmio.hh
new file mode 100644
index 0000000..1680362
--- /dev/null
+++ b/mmio.hh
@@ -0,0 +1,32 @@
+#ifndef _MMIO_HH_
+#define _MMIO_HH_
+
+#include <cstdint>
+
+#include "mm.hh"
+#include "cortexa8.hh"
+
+class MMIO_alloc {
+public:
+  MMIO_alloc(uintptr_t base_p, unsigned size = 1) : base_p_{base_p}, base_v_{mm::virtalloc_io(size)}, size_{size} {
+    cortexa8::map_pages(base_v_, base_p_, size_);
+  }
+
+  ~MMIO_alloc() {
+    cortexa8::unmap_pages(base_v_, size_);
+  }
+
+  uintptr_t const& get_virt() const noexcept {
+    return base_v_;
+  }
+
+  uintptr_t const& get_phys() const noexcept {
+    return base_p_;
+  }
+
+private:
+  uintptr_t base_p_, base_v_;
+  unsigned size_;
+};
+
+#endif
diff --git a/omap35x.cc b/omap35x.cc
index 5105183..7056c09 100644
--- a/omap35x.cc
+++ b/omap35x.cc
@@ -1,53 +1,83 @@
 #include <cstdint>
 #include <cstdio>
+#include <cassert>
 
+#include "util.hh"
+#include "mmio.hh"
 #include "omap35x.hh"
 
 using std::printf;
 
-static volatile uint32_t *const omap35x_omap_sr = (uint32_t*)0x4800244c; // 1 word
-static volatile uint32_t *const omap35x_idcode = (uint32_t*)0x4830a204; // 1 dword
-static volatile uint32_t *const omap35x_die_id = (uint32_t*)0x4830a218; // 4 dwords
-static volatile uint32_t *const omap35x_skuid = (uint32_t*)0x4830a20c; // 1 dword
+static const char *const omap_names[5] = {"OMAP3530", "OMAP3525", "OMAP3515", "OMAP3503", "UNKNOWN"};
+static const char *const omap_ver[8] = {"ES 1.0", "ES 2.0", "ES 2.1", "ES 3.0", "ES 3.1", "UNKNOWN", "UNKNOWN", "ES 3.1.2"};
 
-static const char *const omap_names[] = {"OMAP3530", "OMAP3525", "OMAP3515", "OMAP3503", "UNKNOWN"};
-static const char *const omap_ver[] = {"ES 1.0", "ES 2.0", "ES 2.1", "ES 3.0", "ES 3.1", "UNKNOWN", "UNKNOWN", "ES 3.1.2"};
-
-void omap35x_print_chip_id() {
-  uint16_t omapsr = *omap35x_omap_sr&0xffffu;
-  int omapsr_idx;
-  switch(omapsr) {
-  case 0x0c00:
-    omapsr_idx = 0;
-    break;
-  case 0x4c00:
-    omapsr_idx = 1;
-    break;
-  case 0x1c00:
-    omapsr_idx = 2;
-    break;
-  case 0x5c00:
-    omapsr_idx = 3;
-    break;
-  default:
-    printf("Warning: Unknown OMAP35x type (%.8lx)\n", *omap35x_omap_sr);
-    omapsr_idx = 4;
-    break;
+class OMAP35x_Info_impl {
+public:
+  OMAP35x_Info_impl(uintptr_t scm_base, uintptr_t control_base) : scm_base_{scm_base}, control_base_{control_base} {
   }
 
-  uint32_t idcode = *omap35x_idcode;
-  int id_idx = (idcode&0xf0000000u)>>28;
-  if(id_idx > 7) // Versions 8..15 are unknown
-    id_idx = 6;
-  if(id_idx == 5 || id_idx == 6)
-    printf("Warning: Unknown OMAP35x version (%.8lx)\n", idcode);
+  ~OMAP35x_Info_impl() {
+  }
 
-  bool highfreq = false;
-  if((*omap35x_skuid&0xf) == 0x8)
-    highfreq = true;
+  void print_chip_id() {
+    uint16_t omapsr = r_omap_sr()&0xffffu;
+    int omapsr_idx;
+    switch(omapsr) {
+    case 0x0c00:
+      omapsr_idx = 0;
+      break;
+    case 0x4c00:
+      omapsr_idx = 1;
+      break;
+    case 0x1c00:
+      omapsr_idx = 2;
+      break;
+    case 0x5c00:
+      omapsr_idx = 3;
+      break;
+    default:
+      printf("Warning: Unknown OMAP35x type (%.8lx)\n", r_omap_sr());
+      omapsr_idx = 4;
+      break;
+    }
 
-  printf("%s %s %s Serial# %.8lx%.8lx%.8lx%.8lx\n",
-  	 omap_names[omapsr_idx], omap_ver[id_idx],
-  	 highfreq?"720 MHz":"600 MHz",
-  	 omap35x_die_id[3], omap35x_die_id[2], omap35x_die_id[1], omap35x_die_id[0]);
+    uint32_t idcode = r_idcode();
+    int id_idx = (idcode&0xf0000000u)>>28;
+    if(id_idx > 7) // Versions 8..15 are unknown
+      id_idx = 6;
+    if(id_idx == 5 || id_idx == 6)
+      printf("Warning: Unknown OMAP35x version (%.8lx)\n", idcode);
+
+    bool highfreq = false;
+    if((r_skuid()&0xf) == 0x8)
+      highfreq = true;
+
+    printf("%s %s %s Serial# %.8lx%.8lx%.8lx%.8lx\n",
+	   omap_names[omapsr_idx], omap_ver[id_idx],
+	   highfreq?"720 MHz":"600 MHz",
+	   r_die_id(3), r_die_id(2), r_die_id(1), r_die_id(0));
+  }
+
+  std::array<uint32_t, 4> get_serial() {
+    return std::array<uint32_t, 4>{r_die_id(3), r_die_id(2), r_die_id(1), r_die_id(0)};
+  }
+  
+private:
+  MMIO_alloc scm_base_, control_base_;
+  
+  uint32_t volatile& r_omap_sr() { return _reg32(scm_base_.get_virt(), 0x44c); }
+  uint32_t volatile& r_idcode() { return _reg32(control_base_.get_virt(), 0x204); }
+  uint32_t volatile& r_die_id(int n) { assert(n >= 0 && n <= 3); return _reg32(control_base_.get_virt(), 0x218+0x4*n); }
+  uint32_t volatile& r_skuid() { return _reg32(control_base_.get_virt(), 0x20c); }
+};
+
+
+OMAP35x_Info::OMAP35x_Info(uintptr_t scm_base, uintptr_t control_base) : impl_{new OMAP35x_Info_impl{scm_base, control_base}} {
+}
+
+OMAP35x_Info::~OMAP35x_Info() {
+}
+
+void OMAP35x_Info::print_chip_id() {
+  impl_->print_chip_id();
 }
diff --git a/omap35x.hh b/omap35x.hh
index 8b13fd3..d30ccee 100644
--- a/omap35x.hh
+++ b/omap35x.hh
@@ -1,6 +1,19 @@
 #ifndef _OMAP35X_HH_
 #define _OMAP35X_HH_
 
-void omap35x_print_chip_id();
+#include <memory>
+
+class OMAP35x_Info_impl;
+
+class OMAP35x_Info {
+public:
+  OMAP35x_Info(uintptr_t scm_base, uintptr_t control_base);
+  ~OMAP35x_Info();
+
+  void print_chip_id();
+
+private:
+  std::unique_ptr<OMAP35x_Info_impl> impl_;
+};
 
 #endif
diff --git a/omap35x_intc.cc b/omap35x_intc.cc
index b3f80db..523ddd4 100644
--- a/omap35x_intc.cc
+++ b/omap35x_intc.cc
@@ -3,9 +3,10 @@
 #include <cassert>
 #include <array>
 
-#include "omap35x_intc.hh"
 #include "cortexa8.hh"
+#include "mmio.hh"
 #include "util.hh"
+#include "omap35x_intc.hh"
 
 
 extern "C" void _omap35x_intc_handler() __attribute__((interrupt ("IRQ")));
@@ -72,27 +73,27 @@ private:
     __asm__ __volatile__ ("dsb");
   } 
 
-  uint32_t volatile& r_sysconfig() {return _reg32(base_, 0x10); }
-  uint32_t volatile& r_sysstatus() {return _reg32(base_, 0x14); }
-  uint32_t volatile& r_sir_irq() {return _reg32(base_, 0x40); }
-  uint32_t volatile& r_sir_fiq() {return _reg32(base_, 0x44); }
-  uint32_t volatile& r_control() {return _reg32(base_, 0x48); }
-  uint32_t volatile& r_protection() {return _reg32(base_, 0x4c); }
-  uint32_t volatile& r_idle() {return _reg32(base_, 0x50); }
-  uint32_t volatile& r_irq_priority() {return _reg32(base_, 0x60); }
-  uint32_t volatile& r_fiq_priority() {return _reg32(base_, 0x64); }
-  uint32_t volatile& r_threshold() {return _reg32(base_, 0x68); }
-  uint32_t volatile& r_itr(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x80+0x20*n); }
-  uint32_t volatile& r_mir(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x84+0x20*n); }
-  uint32_t volatile& r_mir_clear(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x88+0x20*n); }
-  uint32_t volatile& r_mir_set(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x8c+0x20*n); }
-  uint32_t volatile& r_isr_set(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x90+0x20*n); }
-  uint32_t volatile& r_isr_clear(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x94+0x20*n); }
-  uint32_t volatile& r_pending_irq(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x98+0x20*n); }
-  uint32_t volatile& r_pending_fiq(int n) {assert(n >= 0 && n <= 2); return _reg32(base_, 0x9c+0x20*n); }
-  uint32_t volatile& r_ilr(int m) {assert(m >= 0 && m <= 95); return _reg32(base_, 0x100+0x4*m); }
+  uint32_t volatile& r_sysconfig() {return _reg32(base_.get_virt(), 0x10); }
+  uint32_t volatile& r_sysstatus() {return _reg32(base_.get_virt(), 0x14); }
+  uint32_t volatile& r_sir_irq() {return _reg32(base_.get_virt(), 0x40); }
+  uint32_t volatile& r_sir_fiq() {return _reg32(base_.get_virt(), 0x44); }
+  uint32_t volatile& r_control() {return _reg32(base_.get_virt(), 0x48); }
+  uint32_t volatile& r_protection() {return _reg32(base_.get_virt(), 0x4c); }
+  uint32_t volatile& r_idle() {return _reg32(base_.get_virt(), 0x50); }
+  uint32_t volatile& r_irq_priority() {return _reg32(base_.get_virt(), 0x60); }
+  uint32_t volatile& r_fiq_priority() {return _reg32(base_.get_virt(), 0x64); }
+  uint32_t volatile& r_threshold() {return _reg32(base_.get_virt(), 0x68); }
+  uint32_t volatile& r_itr(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x80+0x20*n); }
+  uint32_t volatile& r_mir(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x84+0x20*n); }
+  uint32_t volatile& r_mir_clear(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x88+0x20*n); }
+  uint32_t volatile& r_mir_set(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x8c+0x20*n); }
+  uint32_t volatile& r_isr_set(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x90+0x20*n); }
+  uint32_t volatile& r_isr_clear(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x94+0x20*n); }
+  uint32_t volatile& r_pending_irq(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x98+0x20*n); }
+  uint32_t volatile& r_pending_fiq(int n) {assert(n >= 0 && n <= 2); return _reg32(base_.get_virt(), 0x9c+0x20*n); }
+  uint32_t volatile& r_ilr(int m) {assert(m >= 0 && m <= 95); return _reg32(base_.get_virt(), 0x100+0x4*m); }
 
-  uintptr_t base_;
+  MMIO_alloc base_;
   std::array<int_handler_t, 96> handler_tbl_;
 };
 
diff --git a/omap35x_prcm.cc b/omap35x_prcm.cc
index 7ac5e78..6e3f7dd 100644
--- a/omap35x_prcm.cc
+++ b/omap35x_prcm.cc
@@ -1,13 +1,14 @@
 #include <stdint.h>
 #include <stdio.h>
-
-#include "omap35x_prcm.hh"
+#include <cassert>
 
 #include "util.hh"
+#include "mmio.hh"
+#include "omap35x_prcm.hh"
 
 class OMAP35x_prcm_impl {
 public:
-  OMAP35x_prcm_impl(uintptr_t cm_base, uintptr_t pm_base) :cm_base_(cm_base), pm_base_(pm_base) {
+  OMAP35x_prcm_impl(uintptr_t cm_base, uintptr_t pm_base) :cm_base_{cm_base, 28}, pm_base_{pm_base, 42} {
     // Setup IVA2 domain (unused, disable)
     r_clkstctrl_iva2() = 0x3;
     r_wkdep_iva2() = 0;
@@ -64,188 +65,194 @@ public:
     r_wken_per() |= (1<<11); // UART3 wake up enable
   }
 
+  void clear_wake_per(int n) {
+    assert(n >= 0 && n <= 31);
+    r_wkst_per() = (1<<n);
+  }
+
 private:
-  uintptr_t cm_base_, pm_base_;
+  MMIO_alloc cm_base_, pm_base_;
+  //uintptr_t cm_base_, pm_base_;
 
   // CM registers
-  uint32_t volatile& r_fclken_iva2() { return _reg32(cm_base_, 0x0); }
-  uint32_t volatile& r_clken_pll_iva2() { return _reg32(cm_base_, 0x4); }
-  uint32_t volatile& r_idlest_iva2() { return _reg32(cm_base_, 0x20); }
-  uint32_t volatile& r_idlest_pll_iva2() { return _reg32(cm_base_, 0x24); }
-  uint32_t volatile& r_autoidle_pll_iva2() { return _reg32(cm_base_, 0x34); }
-  uint32_t volatile& r_clksel1_pll_iva2() { return _reg32(cm_base_, 0x40); }
-  uint32_t volatile& r_clksel2_pll_iva2() { return _reg32(cm_base_, 0x44); }
-  uint32_t volatile& r_clkstctrl_iva2() { return _reg32(cm_base_, 0x48); }
-  uint32_t volatile& r_clkstst_iva2() { return _reg32(cm_base_, 0x4c); }
+  uint32_t volatile& r_fclken_iva2() { return _reg32(cm_base_.get_virt(), 0x0); }
+  uint32_t volatile& r_clken_pll_iva2() { return _reg32(cm_base_.get_virt(), 0x4); }
+  uint32_t volatile& r_idlest_iva2() { return _reg32(cm_base_.get_virt(), 0x20); }
+  uint32_t volatile& r_idlest_pll_iva2() { return _reg32(cm_base_.get_virt(), 0x24); }
+  uint32_t volatile& r_autoidle_pll_iva2() { return _reg32(cm_base_.get_virt(), 0x34); }
+  uint32_t volatile& r_clksel1_pll_iva2() { return _reg32(cm_base_.get_virt(), 0x40); }
+  uint32_t volatile& r_clksel2_pll_iva2() { return _reg32(cm_base_.get_virt(), 0x44); }
+  uint32_t volatile& r_clkstctrl_iva2() { return _reg32(cm_base_.get_virt(), 0x48); }
+  uint32_t volatile& r_clkstst_iva2() { return _reg32(cm_base_.get_virt(), 0x4c); }
 
-  uint32_t volatile& r_clken_pll_mpu() { return _reg32(cm_base_, 0x904); }
-  uint32_t volatile& r_idlest_mpu() { return _reg32(cm_base_, 0x920); }
-  uint32_t volatile& r_idlest_pll_mpu() { return _reg32(cm_base_, 0x924); }
-  uint32_t volatile& r_autoidle_pll_mpu() { return _reg32(cm_base_, 0x934); }
-  uint32_t volatile& r_clksel1_pll_mpu() { return _reg32(cm_base_, 0x940); }
-  uint32_t volatile& r_clksel2_pll_mpu() { return _reg32(cm_base_, 0x944); }
-  uint32_t volatile& r_clkstctrl_mpu() { return _reg32(cm_base_, 0x948); }
-  uint32_t volatile& r_clkstst_mpu() { return _reg32(cm_base_, 0x94c); }
+  uint32_t volatile& r_clken_pll_mpu() { return _reg32(cm_base_.get_virt(), 0x904); }
+  uint32_t volatile& r_idlest_mpu() { return _reg32(cm_base_.get_virt(), 0x920); }
+  uint32_t volatile& r_idlest_pll_mpu() { return _reg32(cm_base_.get_virt(), 0x924); }
+  uint32_t volatile& r_autoidle_pll_mpu() { return _reg32(cm_base_.get_virt(), 0x934); }
+  uint32_t volatile& r_clksel1_pll_mpu() { return _reg32(cm_base_.get_virt(), 0x940); }
+  uint32_t volatile& r_clksel2_pll_mpu() { return _reg32(cm_base_.get_virt(), 0x944); }
+  uint32_t volatile& r_clkstctrl_mpu() { return _reg32(cm_base_.get_virt(), 0x948); }
+  uint32_t volatile& r_clkstst_mpu() { return _reg32(cm_base_.get_virt(), 0x94c); }
 
-  uint32_t volatile& r_fclken1_core() { return _reg32(cm_base_, 0xa00); }
-  uint32_t volatile& r_fclken3_core() { return _reg32(cm_base_, 0xa08); }
-  uint32_t volatile& r_iclken1_core() { return _reg32(cm_base_, 0xa10); }
-  uint32_t volatile& r_iclken3_core() { return _reg32(cm_base_, 0xa18); }
-  uint32_t volatile& r_idlest1_core() { return _reg32(cm_base_, 0xa20); }
-  uint32_t volatile& r_idlest3_core() { return _reg32(cm_base_, 0xa28); }
-  uint32_t volatile& r_autoidle1_core() { return _reg32(cm_base_, 0xa30); }
-  uint32_t volatile& r_autoidle3_core() { return _reg32(cm_base_, 0xa38); }
-  uint32_t volatile& r_clksel_core() { return _reg32(cm_base_, 0xa40); }
-  uint32_t volatile& r_clkstctrl_core() { return _reg32(cm_base_, 0xa48); }
-  uint32_t volatile& r_clkstst_core() { return _reg32(cm_base_, 0xa4c); }
+  uint32_t volatile& r_fclken1_core() { return _reg32(cm_base_.get_virt(), 0xa00); }
+  uint32_t volatile& r_fclken3_core() { return _reg32(cm_base_.get_virt(), 0xa08); }
+  uint32_t volatile& r_iclken1_core() { return _reg32(cm_base_.get_virt(), 0xa10); }
+  uint32_t volatile& r_iclken3_core() { return _reg32(cm_base_.get_virt(), 0xa18); }
+  uint32_t volatile& r_idlest1_core() { return _reg32(cm_base_.get_virt(), 0xa20); }
+  uint32_t volatile& r_idlest3_core() { return _reg32(cm_base_.get_virt(), 0xa28); }
+  uint32_t volatile& r_autoidle1_core() { return _reg32(cm_base_.get_virt(), 0xa30); }
+  uint32_t volatile& r_autoidle3_core() { return _reg32(cm_base_.get_virt(), 0xa38); }
+  uint32_t volatile& r_clksel_core() { return _reg32(cm_base_.get_virt(), 0xa40); }
+  uint32_t volatile& r_clkstctrl_core() { return _reg32(cm_base_.get_virt(), 0xa48); }
+  uint32_t volatile& r_clkstst_core() { return _reg32(cm_base_.get_virt(), 0xa4c); }
 
-  uint32_t volatile& r_fclken_sgx() { return _reg32(cm_base_, 0xb00); }
-  uint32_t volatile& r_iclken_sgx() { return _reg32(cm_base_, 0xb10); }
-  uint32_t volatile& r_idlest_sgx() { return _reg32(cm_base_, 0xb20); }
-  uint32_t volatile& r_clksel_sgx() { return _reg32(cm_base_, 0xb40); }
-  uint32_t volatile& r_sleepdep_sgx() { return _reg32(cm_base_, 0xb44); }
-  uint32_t volatile& r_clkstctrl_sgx() { return _reg32(cm_base_, 0xb48); }
-  uint32_t volatile& r_clkstst_sgx() { return _reg32(cm_base_, 0xb4c); }
+  uint32_t volatile& r_fclken_sgx() { return _reg32(cm_base_.get_virt(), 0xb00); }
+  uint32_t volatile& r_iclken_sgx() { return _reg32(cm_base_.get_virt(), 0xb10); }
+  uint32_t volatile& r_idlest_sgx() { return _reg32(cm_base_.get_virt(), 0xb20); }
+  uint32_t volatile& r_clksel_sgx() { return _reg32(cm_base_.get_virt(), 0xb40); }
+  uint32_t volatile& r_sleepdep_sgx() { return _reg32(cm_base_.get_virt(), 0xb44); }
+  uint32_t volatile& r_clkstctrl_sgx() { return _reg32(cm_base_.get_virt(), 0xb48); }
+  uint32_t volatile& r_clkstst_sgx() { return _reg32(cm_base_.get_virt(), 0xb4c); }
 
-  uint32_t volatile& r_fclken_wkup() { return _reg32(cm_base_, 0xc00); }
-  uint32_t volatile& r_iclken_wkup() { return _reg32(cm_base_, 0xc10); }
-  uint32_t volatile& r_idlest_wkup() { return _reg32(cm_base_, 0xc20); }
-  uint32_t volatile& r_autoidle_wkup() { return _reg32(cm_base_, 0xc30); }
-  uint32_t volatile& r_clksel_wkup() { return _reg32(cm_base_, 0xc40); }
+  uint32_t volatile& r_fclken_wkup() { return _reg32(cm_base_.get_virt(), 0xc00); }
+  uint32_t volatile& r_iclken_wkup() { return _reg32(cm_base_.get_virt(), 0xc10); }
+  uint32_t volatile& r_idlest_wkup() { return _reg32(cm_base_.get_virt(), 0xc20); }
+  uint32_t volatile& r_autoidle_wkup() { return _reg32(cm_base_.get_virt(), 0xc30); }
+  uint32_t volatile& r_clksel_wkup() { return _reg32(cm_base_.get_virt(), 0xc40); }
 
-  uint32_t volatile& r_clken1_pll() { return _reg32(cm_base_, 0xd00); }
-  uint32_t volatile& r_clken2_pll() { return _reg32(cm_base_, 0xd04); }
-  uint32_t volatile& r_idlest1_pll() { return _reg32(cm_base_, 0xd20); }
-  uint32_t volatile& r_idlest2_pll() { return _reg32(cm_base_, 0xd24); }
-  uint32_t volatile& r_autoidle_pll() { return _reg32(cm_base_, 0xd30); }
-  uint32_t volatile& r_autoidle2_pll() { return _reg32(cm_base_, 0xd34); }
-  uint32_t volatile& r_clksel1_pll() { return _reg32(cm_base_, 0xd40); }
-  uint32_t volatile& r_clksel2_pll() { return _reg32(cm_base_, 0xd44); }
-  uint32_t volatile& r_clksel3_pll() { return _reg32(cm_base_, 0xd48); }
-  uint32_t volatile& r_clksel4_pll() { return _reg32(cm_base_, 0xd4c); }
-  uint32_t volatile& r_clksel5_pll() { return _reg32(cm_base_, 0xd50); }
-  uint32_t volatile& r_clkout_ctrl() { return _reg32(cm_base_, 0xd70); }
+  uint32_t volatile& r_clken1_pll() { return _reg32(cm_base_.get_virt(), 0xd00); }
+  uint32_t volatile& r_clken2_pll() { return _reg32(cm_base_.get_virt(), 0xd04); }
+  uint32_t volatile& r_idlest1_pll() { return _reg32(cm_base_.get_virt(), 0xd20); }
+  uint32_t volatile& r_idlest2_pll() { return _reg32(cm_base_.get_virt(), 0xd24); }
+  uint32_t volatile& r_autoidle_pll() { return _reg32(cm_base_.get_virt(), 0xd30); }
+  uint32_t volatile& r_autoidle2_pll() { return _reg32(cm_base_.get_virt(), 0xd34); }
+  uint32_t volatile& r_clksel1_pll() { return _reg32(cm_base_.get_virt(), 0xd40); }
+  uint32_t volatile& r_clksel2_pll() { return _reg32(cm_base_.get_virt(), 0xd44); }
+  uint32_t volatile& r_clksel3_pll() { return _reg32(cm_base_.get_virt(), 0xd48); }
+  uint32_t volatile& r_clksel4_pll() { return _reg32(cm_base_.get_virt(), 0xd4c); }
+  uint32_t volatile& r_clksel5_pll() { return _reg32(cm_base_.get_virt(), 0xd50); }
+  uint32_t volatile& r_clkout_ctrl() { return _reg32(cm_base_.get_virt(), 0xd70); }
 
-  uint32_t volatile& r_fclken_dss() { return _reg32(cm_base_, 0xe00); }
-  uint32_t volatile& r_iclken_dss() { return _reg32(cm_base_, 0xe10); }
-  uint32_t volatile& r_idlest_dss() { return _reg32(cm_base_, 0xe20); }
-  uint32_t volatile& r_autoidle_dss() { return _reg32(cm_base_, 0xe30); }
-  uint32_t volatile& r_clksel_dss() { return _reg32(cm_base_, 0xe40); }
-  uint32_t volatile& r_sleepdep_dss() { return _reg32(cm_base_, 0xe44); }
-  uint32_t volatile& r_clkstctrl_dss() { return _reg32(cm_base_, 0xe48); }
-  uint32_t volatile& r_clkstst_dss() { return _reg32(cm_base_, 0xe4c); }
+  uint32_t volatile& r_fclken_dss() { return _reg32(cm_base_.get_virt(), 0xe00); }
+  uint32_t volatile& r_iclken_dss() { return _reg32(cm_base_.get_virt(), 0xe10); }
+  uint32_t volatile& r_idlest_dss() { return _reg32(cm_base_.get_virt(), 0xe20); }
+  uint32_t volatile& r_autoidle_dss() { return _reg32(cm_base_.get_virt(), 0xe30); }
+  uint32_t volatile& r_clksel_dss() { return _reg32(cm_base_.get_virt(), 0xe40); }
+  uint32_t volatile& r_sleepdep_dss() { return _reg32(cm_base_.get_virt(), 0xe44); }
+  uint32_t volatile& r_clkstctrl_dss() { return _reg32(cm_base_.get_virt(), 0xe48); }
+  uint32_t volatile& r_clkstst_dss() { return _reg32(cm_base_.get_virt(), 0xe4c); }
 
-  uint32_t volatile& r_fclken_cam() { return _reg32(cm_base_, 0xf00); }
-  uint32_t volatile& r_iclken_cam() { return _reg32(cm_base_, 0xf10); }
-  uint32_t volatile& r_idlest_cam() { return _reg32(cm_base_, 0xf20); }
-  uint32_t volatile& r_autoidle_cam() { return _reg32(cm_base_, 0xf30); }
-  uint32_t volatile& r_clksel_cam() { return _reg32(cm_base_, 0xf40); }
-  uint32_t volatile& r_sleepdep_cam() { return _reg32(cm_base_, 0xf44); }
-  uint32_t volatile& r_clkstctrl_cam() { return _reg32(cm_base_, 0xf48); }
-  uint32_t volatile& r_clkstst_cam() { return _reg32(cm_base_, 0xf4c); }
+  uint32_t volatile& r_fclken_cam() { return _reg32(cm_base_.get_virt(), 0xf00); }
+  uint32_t volatile& r_iclken_cam() { return _reg32(cm_base_.get_virt(), 0xf10); }
+  uint32_t volatile& r_idlest_cam() { return _reg32(cm_base_.get_virt(), 0xf20); }
+  uint32_t volatile& r_autoidle_cam() { return _reg32(cm_base_.get_virt(), 0xf30); }
+  uint32_t volatile& r_clksel_cam() { return _reg32(cm_base_.get_virt(), 0xf40); }
+  uint32_t volatile& r_sleepdep_cam() { return _reg32(cm_base_.get_virt(), 0xf44); }
+  uint32_t volatile& r_clkstctrl_cam() { return _reg32(cm_base_.get_virt(), 0xf48); }
+  uint32_t volatile& r_clkstst_cam() { return _reg32(cm_base_.get_virt(), 0xf4c); }
 
-  uint32_t volatile& r_fclken_per() { return _reg32(cm_base_, 0x1000); }
-  uint32_t volatile& r_iclken_per() { return _reg32(cm_base_, 0x1010); }
-  uint32_t volatile& r_idlest_per() { return _reg32(cm_base_, 0x1020); }
-  uint32_t volatile& r_autoidle_per() { return _reg32(cm_base_, 0x1030); }
-  uint32_t volatile& r_clksel_per() { return _reg32(cm_base_, 0x1040); }
-  uint32_t volatile& r_sleepdep_per() { return _reg32(cm_base_, 0x1044); }
-  uint32_t volatile& r_clkstctrl_per() { return _reg32(cm_base_, 0x1048); }
-  uint32_t volatile& r_clkstst_per() { return _reg32(cm_base_, 0x104c); }
+  uint32_t volatile& r_fclken_per() { return _reg32(cm_base_.get_virt(), 0x1000); }
+  uint32_t volatile& r_iclken_per() { return _reg32(cm_base_.get_virt(), 0x1010); }
+  uint32_t volatile& r_idlest_per() { return _reg32(cm_base_.get_virt(), 0x1020); }
+  uint32_t volatile& r_autoidle_per() { return _reg32(cm_base_.get_virt(), 0x1030); }
+  uint32_t volatile& r_clksel_per() { return _reg32(cm_base_.get_virt(), 0x1040); }
+  uint32_t volatile& r_sleepdep_per() { return _reg32(cm_base_.get_virt(), 0x1044); }
+  uint32_t volatile& r_clkstctrl_per() { return _reg32(cm_base_.get_virt(), 0x1048); }
+  uint32_t volatile& r_clkstst_per() { return _reg32(cm_base_.get_virt(), 0x104c); }
 
-  uint32_t volatile& r_idlest_neon() { return _reg32(cm_base_, 0x1320); }
-  uint32_t volatile& r_clkstctrl_neon() { return _reg32(cm_base_, 0x1348); }
+  uint32_t volatile& r_idlest_neon() { return _reg32(cm_base_.get_virt(), 0x1320); }
+  uint32_t volatile& r_clkstctrl_neon() { return _reg32(cm_base_.get_virt(), 0x1348); }
 
-  uint32_t volatile& r_fclken_usbhost() { return _reg32(cm_base_, 0x1400); }
-  uint32_t volatile& r_iclken_usbhost() { return _reg32(cm_base_, 0x1410); }
-  uint32_t volatile& r_idlest_usbhost() { return _reg32(cm_base_, 0x1420); }
-  uint32_t volatile& r_autoidle_usbhost() { return _reg32(cm_base_, 0x1430); }
-  uint32_t volatile& r_clksel_usbhost() { return _reg32(cm_base_, 0x1440); }
-  uint32_t volatile& r_sleepdep_usbhost() { return _reg32(cm_base_, 0x1444); }
-  uint32_t volatile& r_clkstctrl_usbhost() { return _reg32(cm_base_, 0x1448); }
-  uint32_t volatile& r_clkstst_usbhost() { return _reg32(cm_base_, 0x144c); }
+  uint32_t volatile& r_fclken_usbhost() { return _reg32(cm_base_.get_virt(), 0x1400); }
+  uint32_t volatile& r_iclken_usbhost() { return _reg32(cm_base_.get_virt(), 0x1410); }
+  uint32_t volatile& r_idlest_usbhost() { return _reg32(cm_base_.get_virt(), 0x1420); }
+  uint32_t volatile& r_autoidle_usbhost() { return _reg32(cm_base_.get_virt(), 0x1430); }
+  uint32_t volatile& r_clksel_usbhost() { return _reg32(cm_base_.get_virt(), 0x1440); }
+  uint32_t volatile& r_sleepdep_usbhost() { return _reg32(cm_base_.get_virt(), 0x1444); }
+  uint32_t volatile& r_clkstctrl_usbhost() { return _reg32(cm_base_.get_virt(), 0x1448); }
+  uint32_t volatile& r_clkstst_usbhost() { return _reg32(cm_base_.get_virt(), 0x144c); }
 
   // PM registers
-  uint32_t volatile& r_rstctrl_iva2() {return _reg32(pm_base_, 0x50); }
-  uint32_t volatile& r_rstst_iva2() {return _reg32(pm_base_, 0x58); }
-  uint32_t volatile& r_wkdep_iva2() {return _reg32(pm_base_, 0xc8); }
-  uint32_t volatile& r_pwstctrl_iva2() {return _reg32(pm_base_, 0xe0); }
-  uint32_t volatile& r_pwstst_iva2() {return _reg32(pm_base_, 0xe4); }
-  uint32_t volatile& r_prepwstst_iva2() {return _reg32(pm_base_, 0xe8); }
-  uint32_t volatile& r_irqstatus_iva2() {return _reg32(pm_base_, 0xf8); }
-  uint32_t volatile& r_irqenable_iva2() {return _reg32(pm_base_, 0xfc); }
+  uint32_t volatile& r_rstctrl_iva2() {return _reg32(pm_base_.get_virt(), 0x50); }
+  uint32_t volatile& r_rstst_iva2() {return _reg32(pm_base_.get_virt(), 0x58); }
+  uint32_t volatile& r_wkdep_iva2() {return _reg32(pm_base_.get_virt(), 0xc8); }
+  uint32_t volatile& r_pwstctrl_iva2() {return _reg32(pm_base_.get_virt(), 0xe0); }
+  uint32_t volatile& r_pwstst_iva2() {return _reg32(pm_base_.get_virt(), 0xe4); }
+  uint32_t volatile& r_prepwstst_iva2() {return _reg32(pm_base_.get_virt(), 0xe8); }
+  uint32_t volatile& r_irqstatus_iva2() {return _reg32(pm_base_.get_virt(), 0xf8); }
+  uint32_t volatile& r_irqenable_iva2() {return _reg32(pm_base_.get_virt(), 0xfc); }
 
-  uint32_t volatile& r_rstst_mpu() {return _reg32(pm_base_, 0x958); }
-  uint32_t volatile& r_wkdep_mpu() {return _reg32(pm_base_, 0x9c8); }
-  uint32_t volatile& r_evgenctrl_mpu() {return _reg32(pm_base_, 0x9d4); }
-  uint32_t volatile& r_evgenontim_mpu() {return _reg32(pm_base_, 0x9d8); }
-  uint32_t volatile& r_evgenofftim_mpu() {return _reg32(pm_base_, 0x9dc); }
-  uint32_t volatile& r_pwstctrl_mpu() {return _reg32(pm_base_, 0x9e0); }
-  uint32_t volatile& r_pwstst_mpu() {return _reg32(pm_base_, 0x9e4); }
-  uint32_t volatile& r_prepwstst_mpu() {return _reg32(pm_base_, 0x9e8); }
+  uint32_t volatile& r_rstst_mpu() {return _reg32(pm_base_.get_virt(), 0x958); }
+  uint32_t volatile& r_wkdep_mpu() {return _reg32(pm_base_.get_virt(), 0x9c8); }
+  uint32_t volatile& r_evgenctrl_mpu() {return _reg32(pm_base_.get_virt(), 0x9d4); }
+  uint32_t volatile& r_evgenontim_mpu() {return _reg32(pm_base_.get_virt(), 0x9d8); }
+  uint32_t volatile& r_evgenofftim_mpu() {return _reg32(pm_base_.get_virt(), 0x9dc); }
+  uint32_t volatile& r_pwstctrl_mpu() {return _reg32(pm_base_.get_virt(), 0x9e0); }
+  uint32_t volatile& r_pwstst_mpu() {return _reg32(pm_base_.get_virt(), 0x9e4); }
+  uint32_t volatile& r_prepwstst_mpu() {return _reg32(pm_base_.get_virt(), 0x9e8); }
 
-  uint32_t volatile& r_rstst_core() {return _reg32(pm_base_, 0xa58); }
-  uint32_t volatile& r_wken1_core() {return _reg32(pm_base_, 0xaa0); }
-  uint32_t volatile& r_mpugrpsel1_core() {return _reg32(pm_base_, 0xaa4); }
-  uint32_t volatile& r_iva2grpsel1_core() {return _reg32(pm_base_, 0xaa8); }
-  uint32_t volatile& r_wkst1_core() {return _reg32(pm_base_, 0xab0); }
-  uint32_t volatile& r_wkst3_core() {return _reg32(pm_base_, 0xab8); }
-  uint32_t volatile& r_pwstctrl_core() {return _reg32(pm_base_, 0xae0); }
-  uint32_t volatile& r_pwstst_core() {return _reg32(pm_base_, 0xae4); }
-  uint32_t volatile& r_prepwstst_core() {return _reg32(pm_base_, 0xae8); }
-  uint32_t volatile& r_wken3_core() {return _reg32(pm_base_, 0xaf0); }
-  uint32_t volatile& r_iva2grpsel2_core() {return _reg32(pm_base_, 0xaf4); }
-  uint32_t volatile& r_mpugrpsel2_core() {return _reg32(pm_base_, 0xaf8); }
+  uint32_t volatile& r_rstst_core() {return _reg32(pm_base_.get_virt(), 0xa58); }
+  uint32_t volatile& r_wken1_core() {return _reg32(pm_base_.get_virt(), 0xaa0); }
+  uint32_t volatile& r_mpugrpsel1_core() {return _reg32(pm_base_.get_virt(), 0xaa4); }
+  uint32_t volatile& r_iva2grpsel1_core() {return _reg32(pm_base_.get_virt(), 0xaa8); }
+  uint32_t volatile& r_wkst1_core() {return _reg32(pm_base_.get_virt(), 0xab0); }
+  uint32_t volatile& r_wkst3_core() {return _reg32(pm_base_.get_virt(), 0xab8); }
+  uint32_t volatile& r_pwstctrl_core() {return _reg32(pm_base_.get_virt(), 0xae0); }
+  uint32_t volatile& r_pwstst_core() {return _reg32(pm_base_.get_virt(), 0xae4); }
+  uint32_t volatile& r_prepwstst_core() {return _reg32(pm_base_.get_virt(), 0xae8); }
+  uint32_t volatile& r_wken3_core() {return _reg32(pm_base_.get_virt(), 0xaf0); }
+  uint32_t volatile& r_iva2grpsel2_core() {return _reg32(pm_base_.get_virt(), 0xaf4); }
+  uint32_t volatile& r_mpugrpsel2_core() {return _reg32(pm_base_.get_virt(), 0xaf8); }
 
-  uint32_t volatile& r_rstst_sgx() {return _reg32(pm_base_, 0xb58); }
-  uint32_t volatile& r_wkdep_sgx() {return _reg32(pm_base_, 0xbc8); }
-  uint32_t volatile& r_pwstctrl_sgx() {return _reg32(pm_base_, 0xbe0); }
-  uint32_t volatile& r_pwstst_sgx() {return _reg32(pm_base_, 0xbe4); }
-  uint32_t volatile& r_prepwstst_sgx() {return _reg32(pm_base_, 0xbe8); }
+  uint32_t volatile& r_rstst_sgx() {return _reg32(pm_base_.get_virt(), 0xb58); }
+  uint32_t volatile& r_wkdep_sgx() {return _reg32(pm_base_.get_virt(), 0xbc8); }
+  uint32_t volatile& r_pwstctrl_sgx() {return _reg32(pm_base_.get_virt(), 0xbe0); }
+  uint32_t volatile& r_pwstst_sgx() {return _reg32(pm_base_.get_virt(), 0xbe4); }
+  uint32_t volatile& r_prepwstst_sgx() {return _reg32(pm_base_.get_virt(), 0xbe8); }
 
-  uint32_t volatile& r_wken_wkup() {return _reg32(pm_base_, 0xca0); }
-  uint32_t volatile& r_mpugrpsel_wkup() {return _reg32(pm_base_, 0xca4); }
-  uint32_t volatile& r_iva2grpsel_wkup() {return _reg32(pm_base_, 0xca8); }
-  uint32_t volatile& r_wkst_wkup() {return _reg32(pm_base_, 0xcb0); }
+  uint32_t volatile& r_wken_wkup() {return _reg32(pm_base_.get_virt(), 0xca0); }
+  uint32_t volatile& r_mpugrpsel_wkup() {return _reg32(pm_base_.get_virt(), 0xca4); }
+  uint32_t volatile& r_iva2grpsel_wkup() {return _reg32(pm_base_.get_virt(), 0xca8); }
+  uint32_t volatile& r_wkst_wkup() {return _reg32(pm_base_.get_virt(), 0xcb0); }
 
-  uint32_t volatile& r_rstst_dss() {return _reg32(pm_base_, 0xe58); }
-  uint32_t volatile& r_wken_dss() {return _reg32(pm_base_, 0xec8); }
-  uint32_t volatile& r_wkdep_dss() {return _reg32(pm_base_, 0xec8); }
-  uint32_t volatile& r_pwstctrl_dss() {return _reg32(pm_base_, 0xee0); }
-  uint32_t volatile& r_pwstst_dss() {return _reg32(pm_base_, 0xee4); }
-  uint32_t volatile& r_prepwstst_dss() {return _reg32(pm_base_, 0xee8); }
+  uint32_t volatile& r_rstst_dss() {return _reg32(pm_base_.get_virt(), 0xe58); }
+  uint32_t volatile& r_wken_dss() {return _reg32(pm_base_.get_virt(), 0xec8); }
+  uint32_t volatile& r_wkdep_dss() {return _reg32(pm_base_.get_virt(), 0xec8); }
+  uint32_t volatile& r_pwstctrl_dss() {return _reg32(pm_base_.get_virt(), 0xee0); }
+  uint32_t volatile& r_pwstst_dss() {return _reg32(pm_base_.get_virt(), 0xee4); }
+  uint32_t volatile& r_prepwstst_dss() {return _reg32(pm_base_.get_virt(), 0xee8); }
 
-  uint32_t volatile& r_rstst_cam() {return _reg32(pm_base_, 0xf58); }
-  uint32_t volatile& r_wkdep_cam() {return _reg32(pm_base_, 0xfc8); }
-  uint32_t volatile& r_pwstctrl_cam() {return _reg32(pm_base_, 0xfe0); }
-  uint32_t volatile& r_pwstst_cam() {return _reg32(pm_base_, 0xfe4); }
-  uint32_t volatile& r_prepwstst_cam() {return _reg32(pm_base_, 0xfe8); }
+  uint32_t volatile& r_rstst_cam() {return _reg32(pm_base_.get_virt(), 0xf58); }
+  uint32_t volatile& r_wkdep_cam() {return _reg32(pm_base_.get_virt(), 0xfc8); }
+  uint32_t volatile& r_pwstctrl_cam() {return _reg32(pm_base_.get_virt(), 0xfe0); }
+  uint32_t volatile& r_pwstst_cam() {return _reg32(pm_base_.get_virt(), 0xfe4); }
+  uint32_t volatile& r_prepwstst_cam() {return _reg32(pm_base_.get_virt(), 0xfe8); }
 
-  uint32_t volatile& r_rstst_per() {return _reg32(pm_base_, 0x1058); }
-  uint32_t volatile& r_wken_per() {return _reg32(pm_base_, 0x10a0); }
-  uint32_t volatile& r_mpugrpsel_per() {return _reg32(pm_base_, 0x10a4); }
-  uint32_t volatile& r_iva2grpsel_per() {return _reg32(pm_base_, 0x10a8); }
-  uint32_t volatile& r_wkst_per() {return _reg32(pm_base_, 0x10b0); }
-  uint32_t volatile& r_wkdep_per() {return _reg32(pm_base_, 0x10c8); }
-  uint32_t volatile& r_pwstctrl_per() {return _reg32(pm_base_, 0x10e0); }
-  uint32_t volatile& r_pwstst_per() {return _reg32(pm_base_, 0x10e4); }
-  uint32_t volatile& r_prepwstst_per() {return _reg32(pm_base_, 0x10e8); }
+  uint32_t volatile& r_rstst_per() {return _reg32(pm_base_.get_virt(), 0x1058); }
+  uint32_t volatile& r_wken_per() {return _reg32(pm_base_.get_virt(), 0x10a0); }
+  uint32_t volatile& r_mpugrpsel_per() {return _reg32(pm_base_.get_virt(), 0x10a4); }
+  uint32_t volatile& r_iva2grpsel_per() {return _reg32(pm_base_.get_virt(), 0x10a8); }
+  uint32_t volatile& r_wkst_per() {return _reg32(pm_base_.get_virt(), 0x10b0); }
+  uint32_t volatile& r_wkdep_per() {return _reg32(pm_base_.get_virt(), 0x10c8); }
+  uint32_t volatile& r_pwstctrl_per() {return _reg32(pm_base_.get_virt(), 0x10e0); }
+  uint32_t volatile& r_pwstst_per() {return _reg32(pm_base_.get_virt(), 0x10e4); }
+  uint32_t volatile& r_prepwstst_per() {return _reg32(pm_base_.get_virt(), 0x10e8); }
   
-  uint32_t volatile& r_rstst_neon() {return _reg32(pm_base_, 0x1358); }
-  uint32_t volatile& r_wkdep_neon() {return _reg32(pm_base_, 0x13c8); }
-  uint32_t volatile& r_pwstctrl_neon() {return _reg32(pm_base_, 0x13e0); }
-  uint32_t volatile& r_pwstst_neon() {return _reg32(pm_base_, 0x13e4); }
-  uint32_t volatile& r_prepwstst_neon() {return _reg32(pm_base_, 0x13e8); }
+  uint32_t volatile& r_rstst_neon() {return _reg32(pm_base_.get_virt(), 0x1358); }
+  uint32_t volatile& r_wkdep_neon() {return _reg32(pm_base_.get_virt(), 0x13c8); }
+  uint32_t volatile& r_pwstctrl_neon() {return _reg32(pm_base_.get_virt(), 0x13e0); }
+  uint32_t volatile& r_pwstst_neon() {return _reg32(pm_base_.get_virt(), 0x13e4); }
+  uint32_t volatile& r_prepwstst_neon() {return _reg32(pm_base_.get_virt(), 0x13e8); }
 
-  uint32_t volatile& r_rstst_usbhost() {return _reg32(pm_base_, 0x1458); }
-  uint32_t volatile& r_wken_usbhost() {return _reg32(pm_base_, 0x14a0); }
-  uint32_t volatile& r_mpugrpsel_usbhost() {return _reg32(pm_base_, 0x14a4); }
-  uint32_t volatile& r_iva2grpsel_usbhost() {return _reg32(pm_base_, 0x14a8); }
-  uint32_t volatile& r_wkst_usbhost() {return _reg32(pm_base_, 0x14b0); }
-  uint32_t volatile& r_wkdep_usbhost() {return _reg32(pm_base_, 0x14c8); }
-  uint32_t volatile& r_pwstctrl_usbhost() {return _reg32(pm_base_, 0x14e0); }
-  uint32_t volatile& r_pwstst_usbhost() {return _reg32(pm_base_, 0x14e4); }
-  uint32_t volatile& r_prepwstst_usbhost() {return _reg32(pm_base_, 0x14e8); }
+  uint32_t volatile& r_rstst_usbhost() {return _reg32(pm_base_.get_virt(), 0x1458); }
+  uint32_t volatile& r_wken_usbhost() {return _reg32(pm_base_.get_virt(), 0x14a0); }
+  uint32_t volatile& r_mpugrpsel_usbhost() {return _reg32(pm_base_.get_virt(), 0x14a4); }
+  uint32_t volatile& r_iva2grpsel_usbhost() {return _reg32(pm_base_.get_virt(), 0x14a8); }
+  uint32_t volatile& r_wkst_usbhost() {return _reg32(pm_base_.get_virt(), 0x14b0); }
+  uint32_t volatile& r_wkdep_usbhost() {return _reg32(pm_base_.get_virt(), 0x14c8); }
+  uint32_t volatile& r_pwstctrl_usbhost() {return _reg32(pm_base_.get_virt(), 0x14e0); }
+  uint32_t volatile& r_pwstst_usbhost() {return _reg32(pm_base_.get_virt(), 0x14e4); }
+  uint32_t volatile& r_prepwstst_usbhost() {return _reg32(pm_base_.get_virt(), 0x14e8); }
 };
 
 
@@ -258,3 +265,7 @@ OMAP35x_prcm::~OMAP35x_prcm() {
 void OMAP35x_prcm::enable_peripherals() {
   impl_->enable_peripherals();
 }
+
+void OMAP35x_prcm::clear_wake_per(int n) {
+  impl_->clear_wake_per(n);
+}
diff --git a/omap35x_prcm.hh b/omap35x_prcm.hh
index 9f6fd9a..ea58ee7 100644
--- a/omap35x_prcm.hh
+++ b/omap35x_prcm.hh
@@ -12,6 +12,8 @@ public:
 
   // Enable clock&power for all used peripherals
   void enable_peripherals();
+  
+  void clear_wake_per(int n);
 
 private:
   std::unique_ptr<OMAP35x_prcm_impl> impl_;
diff --git a/phys_mm.cc b/phys_mm.cc
new file mode 100644
index 0000000..47e59bd
--- /dev/null
+++ b/phys_mm.cc
@@ -0,0 +1,293 @@
+#include <cstdint>
+#include <cassert>
+#include <array>
+#include <algorithm>
+
+#include "util.hh"
+#include "phys_mm.hh"
+
+static constexpr unsigned phys_pages = 65536; // for 256 MiB RAM
+static constexpr unsigned size_steps = 17; // log2(256Mi)-log2(4Ki)+1
+
+using idx_t = uint16_t; // Must have a range of 0..phys_pages-1
+
+struct pb_t {
+  idx_t next_free_by_size;
+  uint8_t size_ln2;
+  unsigned used:1;
+  unsigned nbs_valid:1;
+} __attribute__((packed));
+
+static pb_t phys_blocks[phys_pages];
+
+static unsigned size_starts[size_steps];
+
+// Import symbols from linker
+extern uint32_t _kernel_real_pt_end;
+
+static unsigned _idx(pb_t const& pb) __attribute__((const));
+static unsigned _idx(pb_t const& pb) {
+  return &pb - phys_blocks;
+}
+
+static pb_t& _next_by_pos(pb_t const& pb) __attribute__((pure));
+static pb_t& _next_by_pos(pb_t const& pb) {
+  auto idx = _idx(pb);
+  assert(idx+_pow2(pb.size_ln2) < phys_pages);
+
+  return phys_blocks[idx+_pow2(pb.size_ln2)];
+}
+
+static bool _is_last_pos(pb_t const& pb) __attribute__((pure));
+static bool _is_last_pos(pb_t const& pb) {
+  auto idx = _idx(pb);
+  return (idx+_pow2(pb.size_ln2) >= phys_pages);
+}
+
+// // Caution: Slow, performs linear search
+// static pb_t& _prev_by_pos(pb_t const& pb) __attribute__((pure));
+// static pb_t& _prev_by_pos(pb_t const& pb) {
+//   auto idx = _idx(pb);
+//   assert(idx != 0);
+
+//   unsigned i = 0, prev_i;
+//   while(i < idx) {
+//     prev_i = i;
+//     i = i + _pow2(phys_blocks[i].size_ln2);
+//   }
+//   assert(i == idx);
+//   return phys_blocks[prev_i];
+// }
+
+static pb_t& _next_by_size(pb_t const& pb) __attribute__((pure));
+static pb_t& _next_by_size(pb_t const& pb) {
+  assert(pb.nbs_valid);
+  
+  return phys_blocks[pb.next_free_by_size];
+}
+
+
+static void _ll_insert(unsigned& head, pb_t& elem) {
+  if(head != phys_pages) {
+    elem.next_free_by_size = head;
+    elem.nbs_valid = true;
+  } else
+    elem.nbs_valid = false;
+
+  head = _idx(elem);
+}
+
+static void _ll_remove(unsigned& head, pb_t& elem) {
+  assert(head != phys_pages);
+  
+  // Remove from head is fast
+  if(head == _idx(elem)) {
+    if(elem.nbs_valid)
+      head = elem.next_free_by_size;
+    else
+      head = phys_pages;
+    elem.nbs_valid = false;
+  } else {
+    // Search for previous element
+    pb_t* it = phys_blocks+head;
+    while(&_next_by_size(*it) != &elem) {
+      it = &_next_by_size(*it);
+    }
+
+    // Remove
+    it->next_free_by_size = elem.next_free_by_size;
+    it->nbs_valid = elem.nbs_valid;
+    elem.nbs_valid = false;
+  }
+}
+
+// Returns true if block is the left half of the next larger
+// allocation block, false if it is the right half
+static bool _is_left(pb_t& block) __attribute__((pure));
+static bool _is_left(pb_t& block) {
+  auto idx = _idx(block);
+
+  return !(idx & _pow2(block.size_ln2));
+}
+
+// Gets the left half of the allocation block of which
+// block is the right half. Only valid if _is_left(block)
+// would be false
+static pb_t& _get_left(pb_t& block) __attribute__((pure));
+static pb_t& _get_left(pb_t& block) {
+  return phys_blocks[_idx(block)-_pow2(block.size_ln2)];
+}
+
+static void _split(pb_t&  block) {
+  assert(block.size_ln2 > 0);
+  assert(!block.used);
+  
+  // Remove from free list for old size
+  _ll_remove(size_starts[block.size_ln2], block);
+  
+  // Split
+  --block.size_ln2;
+  auto& next = _next_by_pos(block);
+  next.size_ln2 = block.size_ln2;
+  next.used = false;
+
+  // Add to free list for new size
+  _ll_insert(size_starts[block.size_ln2], block);
+  _ll_insert(size_starts[block.size_ln2], next);
+}
+
+static void _set_used(pb_t& block) {
+  _ll_remove(size_starts[block.size_ln2], block);
+
+  block.used = true;
+}
+
+static void _alloc_at(unsigned idx, unsigned size) {
+  auto& block = phys_blocks[idx];
+
+  assert(block.size_ln2 >= size);
+  assert(!block.used);
+
+  while(block.size_ln2 > size)
+    _split(block);
+
+  _set_used(block);
+}
+
+static unsigned _find_free(unsigned size) {
+  for(unsigned i = size;i < size_steps;++i) {
+    if(size_starts[i] != phys_pages)
+      return size_starts[i];
+  }
+
+  return phys_pages;
+}
+
+// Recursivly merge block if possible
+static pb_t& _merge(pb_t& block) {
+  if(block.size_ln2 == size_steps-1)
+    return block;
+
+  if(_is_left(block) && !_next_by_pos(block).used &&
+     (_next_by_pos(block).size_ln2 == block.size_ln2)) {
+    // Remove from free list for old size
+    _ll_remove(size_starts[block.size_ln2], block);
+    _ll_remove(size_starts[block.size_ln2], _next_by_pos(block));
+    
+    // Merge
+    ++block.size_ln2;
+    
+    // Add to free list for new size
+    _ll_insert(size_starts[block.size_ln2], block);
+
+    return _merge(block);
+  } else if (!_is_left(block) && !_get_left(block).used &&
+	     (_get_left(block).size_ln2 == block.size_ln2)) {
+    // Remove from free list for old size
+    _ll_remove(size_starts[block.size_ln2], block);
+    _ll_remove(size_starts[block.size_ln2], _get_left(block));
+
+    // Merge
+    auto& left = _get_left(block);
+    ++left.size_ln2;
+
+    // Add to free list for new size
+    _ll_insert(size_starts[left.size_ln2], left);
+
+    return _merge(left);
+  }
+
+  return block;
+}
+    
+ 
+static void _free_at(unsigned idx) {
+  auto& block = phys_blocks[idx];
+  assert(block.used);
+  
+  block.used = false;
+  _ll_insert(size_starts[block.size_ln2], block);
+  
+  // Check merge
+  _merge(block);
+}
+  
+static void _print_elem(pb_t const& block) {
+  auto idx = _idx(block);
+  assert(idx < phys_pages);
+
+  printf(" {%u: ", idx);
+  if(block.nbs_valid)
+    printf("%u ", block.next_free_by_size);
+  else
+    printf("- ");
+  printf("%u%s}", block.size_ln2, block.used?" U":"");
+}
+
+void phys_mm::init() {
+  for(unsigned i = 0;i < size_steps-1;++i)
+    size_starts[i] = phys_pages;
+  size_starts[size_steps-1] = 0;
+  
+  phys_blocks[0].size_ln2 = size_steps-1;
+  phys_blocks[0].used = false;
+  phys_blocks[0].nbs_valid = false;
+
+  uintptr_t kernel_size = (uintptr_t)&_kernel_real_pt_end - 0x80000000;
+  unsigned kernel_pages = kernel_size/4096;
+  if(kernel_size%4096 != 0)
+    ++kernel_pages;
+  
+  _alloc_at(0, _ln2(kernel_pages));
+}
+
+uintptr_t phys_mm::alloc(unsigned count) {
+  unsigned i = _find_free(_ln2(count));
+  if(i == phys_pages)
+    throw bad_alloc{};
+  
+  _alloc_at(i, _ln2(count));
+  
+  return 0x80000000+4096*i;
+}
+
+void phys_mm::free(uintptr_t base) {
+  unsigned idx = (base-0x80000000)/4096;
+  _free_at(idx);
+}
+
+void phys_mm::print_state() {
+  printf("Free lists:\n");
+  for(unsigned i = 0;i < size_steps;++i) {
+    printf("\t%u:", i);
+    if(size_starts[i] != phys_pages) {
+      pb_t* it = phys_blocks+size_starts[i];
+      while(true) {
+        _print_elem(*it);
+	if(!it->nbs_valid)
+	  break;
+
+	it = &_next_by_size(*it);
+      }
+    }
+    printf("\n");
+  }
+
+  printf("Blocks:\n");
+  pb_t *it = phys_blocks;
+  while (true) {
+    _print_elem(*it);
+
+    if(_is_last_pos(*it))
+      break;
+
+    it = &_next_by_pos(*it);
+  }
+
+  printf("\n\n");
+}
+
+uintptr_t phys_mm::get_end_of_kernel_alloc() {
+  pb_t& kern = phys_blocks[0];
+  return 0x80000000+4096*_idx(_next_by_pos(kern));
+}
diff --git a/phys_mm.hh b/phys_mm.hh
new file mode 100644
index 0000000..70b4df1
--- /dev/null
+++ b/phys_mm.hh
@@ -0,0 +1,31 @@
+#ifndef _PHYS_MM_HH_
+#define _PHYS_MM_HH_
+
+#include <cstdint>
+
+#include "exceptions.hh"
+
+namespace phys_mm {
+  /* Initialize the physical memory management
+     Initializes internal data structures and
+     sets the RAM containing the kernel image to used */
+  void init();
+
+  // Allocate 'count' consecutive pages of physical memory
+  // Optionally alligned to 'align' pages
+  uintptr_t alloc(unsigned count);
+
+  // Free 'count' consecutive pages of physical memory starting at 'base'
+  void free(uintptr_t base);
+
+  void print_state();
+
+  // Returns the end of the initial physical allocation containing kernel image and data
+  // For use by mm::init
+  uintptr_t get_end_of_kernel_alloc();
+
+  class bad_alloc : public ex::bad_alloc {};
+}
+
+
+#endif
diff --git a/syscall.c b/syscall.c
index ba84b40..2a4804b 100644
--- a/syscall.c
+++ b/syscall.c
@@ -1,6 +1,9 @@
 #include <sys/stat.h>
 #include <stdlib.h>
 
+#include "cortexa8.hh"
+#include "mm.hh"
+
 #include <errno.h>
 #undef errno
 extern int errno;
@@ -65,22 +68,33 @@ int _write(int file, const char *ptr, int len) {
   }
 }
 
-extern char __heap_end, __heap_start;
+extern uint32_t __heap_start;
 caddr_t _sbrk(int incr) __attribute__((used));
 caddr_t _sbrk(int incr) {
-  static caddr_t heap_end = 0;
+  static uintptr_t heap_end = 0;
+  static uintptr_t brk;
 
-  if(heap_end == 0)
-    heap_end = &__heap_start;
-
-  caddr_t prev_heap_end = heap_end;
-  if(heap_end + incr > &__heap_end) {
-    _write(1, "Out of memory\n", 14);
-    abort();
+  if(heap_end == 0) {
+    heap_end = mm::get_heap_end();
+    brk = (uintptr_t)&__heap_start;
+  }
+  
+  if(brk + incr >= heap_end) {
+    // Allocate additional RAM for heap
+    try {
+      unsigned pages = (brk+incr-heap_end+1)/4096;
+      if((brk+incr-heap_end+1)%4096 != 0)
+	++pages;
+      heap_end = mm::grow_heap(pages);
+    } catch (ex::bad_alloc &ex) {
+      _write(1, "Heap allocation failure\n", 24);
+      abort();
+    }
   }
 
-  heap_end += incr;
-  return prev_heap_end;
+  caddr_t prev_brk = (caddr_t)brk;
+  brk += incr;
+  return prev_brk;
 }
 
 int _kill(int pid, int sig) __attribute__((used));
diff --git a/uart.cc b/uart.cc
index 1424367..d731a6f 100644
--- a/uart.cc
+++ b/uart.cc
@@ -4,12 +4,14 @@
 
 #include "cortexa8.hh"
 #include "omap35x_intc.hh"
-#include "uart.hh"
+#include "omap35x_prcm.hh"
 #include "util.hh"
+#include "mmio.hh"
+#include "uart.hh"
 
 class UART_impl {
 public:
-  UART_impl(uintptr_t base, int irq) : base_(base), irq_(irq) {
+UART_impl(uintptr_t base, int irq, OMAP35x_prcm& prcm) : base_{base}, irq_(irq), prcm_(prcm) {
     OMAP35x_intc::get().register_handler(irq_, std::bind(&UART_impl::recv_handler, this), 1);
     OMAP35x_intc::get().enable_int(irq_);
 
@@ -105,59 +107,57 @@ private:
     }
     
     newdata_ = true;
-    *prcm_wkst_per = (1<<11); // Clear UART3 wake bit
+    prcm_.clear_wake_per(11);
   }
 
-  uintptr_t base_;
+  MMIO_alloc base_;
   int irq_;
+  OMAP35x_prcm& prcm_;
 
   uint8_t volatile& r_data() {
-    return _reg8(base_, 0);
+    return _reg8(base_.get_virt(), 0);
   }
 
   uint8_t volatile& r_dll() {
-    return _reg8(base_, 0);
+    return _reg8(base_.get_virt(), 0);
   }
 
   uint8_t volatile& r_dlh() {
-    return _reg8(base_, 4);
+    return _reg8(base_.get_virt(), 4);
   }
 
   uint8_t volatile& r_ier() {
-    return _reg8(base_, 4);
+    return _reg8(base_.get_virt(), 4);
   }
 
   uint8_t volatile& r_fcr() {
-    return _reg8(base_, 8);
+    return _reg8(base_.get_virt(), 8);
   }
 
   uint8_t volatile& r_efr() {
-    return _reg8(base_, 8);
+    return _reg8(base_.get_virt(), 8);
   }
 
   uint8_t volatile& r_lcr() {
-    return _reg8(base_, 0xc);
+    return _reg8(base_.get_virt(), 0xc);
   }
 
   uint8_t volatile& r_lsr() {
-    return _reg8(base_, 0x14);
+    return _reg8(base_.get_virt(), 0x14);
   }
 
   uint8_t volatile& r_ssr() {
-    return _reg8(base_, 0x44);
+    return _reg8(base_.get_virt(), 0x44);
   }
 
   uint8_t volatile& r_sysc() {
-    return _reg8(base_, 0x54);
+    return _reg8(base_.get_virt(), 0x54);
   }
 
   uint8_t volatile& r_wer() {
-    return _reg8(base_, 0x5c);
+    return _reg8(base_.get_virt(), 0x5c);
   }
 
-  volatile uint32_t *const prcm_wkst_per = (uint32_t*)0x483070b0;
-
-
   static const size_t RECVBUFFERSIZE = 128;
   std::array<char, RECVBUFFERSIZE> recvbuffer_;
   volatile size_t recvbuffer_rdptr_ = (RECVBUFFERSIZE-1), recvbuffer_wrptr_ = 0;
@@ -184,7 +184,7 @@ private:
   }
 };
 
-UART::UART(uintptr_t base, int irq) : impl_(new UART_impl(base, irq)) {
+UART::UART(uintptr_t base, int irq, OMAP35x_prcm& prcm) : impl_(new UART_impl(base, irq, prcm)) {
 }
 
 UART::~UART() {
@@ -197,3 +197,48 @@ void UART::write(const char *data, int const& len) {
 int UART::read(char *buf, int const& len) {
   return impl_->read(buf, len);
 }
+
+
+void EarlyUART::write(char const* data, int const& len) {
+  for(int i = 0;i < len;++i)
+    sendb(*data++);
+}
+
+int EarlyUART::read(char *buf, int const& len) {
+  char rd = r_data();
+  if(rd == '\r')
+    rd = '\n';
+  sendb(rd);
+  buf[0] = rd;
+  return 1;
+}
+
+uint8_t volatile& EarlyUART::r_data() {
+  return _reg8(base_, 0);
+}
+
+uint8_t volatile& EarlyUART::r_lsr() {
+  return _reg8(base_, 0x14);
+}
+
+uint8_t volatile& EarlyUART::r_ssr() {
+  return _reg8(base_, 0x44);
+}
+
+void EarlyUART::_wait_txnotfull() {
+  while(r_ssr() & 0x1) {}
+}
+
+void EarlyUART::_wait_rxnotempty() {
+  while(!(r_lsr() & 0x1)) {}
+}
+
+void EarlyUART::sendb(char b) {
+  _wait_txnotfull();
+  r_data() = b;
+}
+
+char EarlyUART::recvb() {
+  _wait_rxnotempty();
+  return r_data();
+}
diff --git a/uart.hh b/uart.hh
index 1b3135f..12c64dd 100644
--- a/uart.hh
+++ b/uart.hh
@@ -4,6 +4,8 @@
 #include <cstdint>
 #include <memory>
 
+class OMAP35x_prcm;
+
 class ICharacterDevice {
 public:
   virtual void write(char const* data, int const& len) = 0;
@@ -14,7 +16,7 @@ class UART_impl;
 
 class UART : public ICharacterDevice {
 public:
-  UART(uintptr_t base, int irq);
+  UART(uintptr_t base, int irq, OMAP35x_prcm& prcm);
   ~UART();
 
   virtual void write(char const* data, int const& len);
@@ -25,4 +27,25 @@ private:
   std::unique_ptr<UART_impl> impl_;
 };
 
+class EarlyUART : public ICharacterDevice {
+public:
+  EarlyUART() {}
+  ~EarlyUART() {}
+
+  virtual void write(char const* data, int const& len);
+  virtual int read(char *buf, int const& len);
+
+private:
+  void _wait_txnotfull();
+  void _wait_rxnotempty();
+  void sendb(char b);
+  char recvb();
+  
+  uint8_t volatile& r_data();
+  uint8_t volatile& r_lsr();
+  uint8_t volatile& r_ssr();
+
+  static const uintptr_t base_ = 0xfffff000;
+};
+
 #endif
diff --git a/util.hh b/util.hh
index 8bb04c6..3f5bf77 100644
--- a/util.hh
+++ b/util.hh
@@ -2,6 +2,7 @@
 #define _UTIL_HH_
 
 #include <cstdint>
+#include <cstddef>
 
 // Functions to access hardware registers. GCC will generate memory access instructions of the correct width.
 // Usage: "_reg8(base, ofs) = 0xf0;" to set
@@ -22,4 +23,21 @@ constexpr inline uint32_t volatile& _reg32(uintptr_t const& base, size_t const&
   return *reinterpret_cast<uint32_t*>(base+ofs);
 }
 
+// log2(n)
+inline unsigned _ln2(unsigned n) noexcept __attribute__((const));
+inline unsigned _ln2(unsigned n) noexcept {
+  uint32_t reg;
+  asm ("clz %[dst], %[src]"
+       : [dst] "=r"(reg) : [src] "r"(n));
+  reg = 31-reg;
+  if(n & ~(1<<reg))
+    ++reg;
+  return reg;
+}
+
+inline constexpr unsigned _pow2(unsigned n) noexcept __attribute__((const));
+inline constexpr unsigned _pow2(unsigned n) noexcept {
+  return (1<<n);
+}
+
 #endif