From 75986ab9ae7e4b5b0e397559c16710ac014ecfa8 Mon Sep 17 00:00:00 2001 From: Nathan Wagner Date: Mon, 24 Oct 2016 20:33:50 -0500 Subject: [PATCH] physical and virtual memory functions --- mem.c | 886 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mem.h | 328 ++++++++++++++++++++++ 2 files changed, 1214 insertions(+) create mode 100644 mem.c create mode 100644 mem.h diff --git a/mem.c b/mem.c new file mode 100644 index 0000000..e8d2776 --- /dev/null +++ b/mem.c @@ -0,0 +1,886 @@ +#include "kernel.h" +#include "mem.h" + +#include "multiboot.h" +#include "spinlock.h" + +#include +#include +#include +#include + +/* + * pa = va + pb - vb + * va = pa + vb - pb + */ + +struct memory memory; +struct phys phys; + +__attribute__((aligned(4096))) struct pml4e kernel_pml4[512]; + +struct pml4e *kernel_pml4ptr; +uint64_t kernel_space; + +extern uintptr_t _kernel_end; +extern uintptr_t _kernel_phys_end; +extern uintptr_t _asm_physmap; + +#if 0 +static void pstackdump() { + printk("phys stack, %llu total pages, %llu top pages, stack = %llx, first free = %llx\n", + phys.free, + phys.stack->pages, + (uintptr_t)phys.stack, + phys.stack->page_addr[phys.stack->pages-1] + ); +} +#endif + +static void dump_pml4(paddr_t space) { + int i; + uint64_t *e = PHY2VIRTP(space); + +#if 0 + uint64_t *pa; + pa = (uint64_t *)(0x21000 + 384*8); + printk("*(%llx) should be 0x2A003, is %llx\n", pa, *pa); + pa = PHY2VIRTP(pa); + printk("*(%llx) should be 0x2A003, is %llx\n", pa, *pa); +#endif + + /* do 4 per line */ + printk("base = %llx, virt = %llx", space, (uintptr_t)e); + for (i = 0; i < 512; i ++) { + if (i % 32 == 0) { + printk("\n%2x:", e+i); + } + printk(" %llx", e[i]); + } + printk("\n"); +} + + +void test_address(void *a) { + uint64_t *high; + + high = a; + + *high = 0x1badd00dULL; + printk("testing 1baddood (0x%16llx) = 0x%llx\n", high,*high); + *high = 0xcafebabeULL; + printk("testing cafebabe (0x%16llx) = 0x%llx\n", high,*high); +} + +void pfree(paddr_t pa) { + struct pstack *old; + + if (!phys.stack) { + /* no stack? use freed page */ + phys.stack = PHY2VIRTP(pa); + printk("initializing phys.stack at %llx -> %llx\n", pa, phys.stack); + phys.stack->pages = 0; + phys.stack->next = 0; + } + + /* if the stack page is full, use the new free + * page for the next stack page + */ + if (phys.stack->pages == 510) { + old = phys.stack; + phys.stack = PHY2VIRTP(pa); + phys.stack->pages = 0; + phys.stack->next = old; + } + phys.stack->page_addr[phys.stack->pages++] = pa; + phys.free++; +} + +/* we never return the zero page, so we return 0 if there's a + * problem + */ +paddr_t palloc(void) { + paddr_t pa = 0; + + if (!phys.free) { + panic("palloc() failed\n"); + return 0; + } + + pa = phys.stack->page_addr[--phys.stack->pages]; + if (phys.stack->pages == 0) { + /* we're about to hand out the stack page itself, so + * fix the stack + */ + phys.stack = phys.stack->next; + } + phys.free--; + + /* clear the page here so callers don't have to */ + memset(PHY2VIRTP(pa), 0, 4096); + return pa; +} + +#define MEM_ALIGNP(addr, align) mem_align((uintptr_t)a, align) + +uintptr_t mem_align(uintptr_t a, size_t alignment) { +#if 0 + uintptr_t a = (uintptr_t) addr; + uintptr_t mask; +#endif + +#if 1 + if (a % alignment) { + a = a + alignment - a % alignment; + } +#else + a = (uintptr_t) addr; + mask = (alignment - 1) ^ (size_t) -1; + a = a & mask; + if (a != (uintptr_t) addr) { + a += alignment; + } +#endif + + return a; +} + +void *kvalloc(size_t bytes) { + size_t pages; + void *base; + + pages = bytes / 4096 + (bytes % 4096 > 0); + /* TODO check if not enough room */ + base = (void *)memory.kend; + memory.kend += pages * 4096; + return base; +} + +void *kpalloc(size_t bytes) { + size_t pages; + paddr_t base; + + pages = bytes / 4096 + (bytes % 4096 != 0); + /* TODO check if not enough room */ + base = mem_align(memory.kphysend, 4096); + memory.kphysend = base + pages * 4096; + return (void *)base; +} + +#define CHECK_FLAG(flags,bit) ((flags) & (1 << (bit))) +void phys_init(struct multiboot_info *mbi) { + struct multiboot_mmap_entry *mmap; + struct multiboot_mmap_entry *base; + + struct phys p = { 0 }; + struct memory m = { 0 }; + + phys = p; + memory = m; + + memory.kend = mem_align(_kernel_end, 4096); + memory.kphysend = _kernel_phys_end; + + /* TODO don't know if we need this now */ + phys.page_map = kvalloc(4096 * 2); /* this is where we will map in a page of memory to adjust */ + + /* phys.kmap needs to be the virtual address of the page table entry + * for mapping stack + * so, phys.kmap -> vadecode(&phys.stack) + */ + + if (!CHECK_FLAG(mbi->flags, 6)) { + panic("no memory map available\n"); + } + + +#if 0 + printk("cr3 = %llx\n", cr3); + printk("new cr3 = %llx\n", (uintptr_t)&kernel_pml4 - _kernel_vma); + cr3 = getcr3(); + printk("cr3 = %llx\n", cr3); +#endif + //memset((void *)0x21000, 0, 4096); + + /* loop over the multiboot info, free pages */ + + /* TODO refactor this into a function to get max physical address */ + base = (struct multiboot_mmap_entry *) (uint64_t) mbi->mmap_addr; + + memory.physmap = _asm_physmap; /* where the early boot initialized the physmap */ + + memory.phys_max = 0; + for (mmap = base; (unsigned long) mmap < mbi->mmap_addr + mbi->mmap_length; + mmap = (struct multiboot_mmap_entry *) ((unsigned long) mmap + mmap->size + sizeof (mmap->size))) { + if (mmap->type == 1 && mmap->addr + mmap->len > memory.phys_max) { + memory.phys_max = mmap->addr + mmap->len; + } + } + + printk("phys_max = %llx, physmap = %llx, %llx 1GB pages\n", memory.phys_max, + memory.physmap, + memory.phys_max / GB(1) + (memory.phys_max % GB(1)) != 0); + + /* set up kernel physical memory map */ + /* need one for each 512 GB */ + /* for now, assume that these are available after the kernel physical + * memory, and below 4 MB, which is still identity mapped + * TODO hardcode the first one into the kernel, map the lowest + * 512 GB there, then use that to get more if needed + * also mark all of these as "global" so they don't get flushed at space change cr3 + */ + + /* + * get free memory and add it to the free page stack + * free memory is generally anything above memory.kphysend + * we won't need to call kpalloc after this point, we can call palloc + * for new physical pages + * we may need to figure out how to get mapping for DMA pages. + * I would really prefer to use 2 MB pages for everything, but the + * system really doesn't like that + * And in any case, I saw some math on usenet that implies that the + * average wasted space would be greater with 2 MB pages than 4KB pages + extra + * paging tables + */ + for (mmap = base; (unsigned long) mmap < mbi->mmap_addr + mbi->mmap_length; + mmap = (struct multiboot_mmap_entry *) ((unsigned long) mmap + mmap->size + sizeof (mmap->size))) { +#if 0 + printk(" size = 0x%x, base_addr = 0x%llx," " length = 0x%llx, type = 0x%x\n", + mmap->size, mmap->addr, mmap->len, mmap->type); +#endif + printk(" addr = 0x%18llx," " length = 0x%llx, type = 0x%x\n", mmap->addr, mmap->len, mmap->type); + + if (mmap->type == 1) { + uintptr_t start, end; + + start = mmap->addr; + end = start + mmap->len; + + /* TODO we don't map in anything below 16 MB so we can + * use it for PCI and DMA */ + if (start < MB(16)) { + start = MB(16); + } + start = mem_align(start, 4096); + + if (start < end - 4096) { + printk(" freeing %llu pages, %llu KB, starting from 0x%llx\n", (end - start)/4096, + (end-start)/1024, start); + } + + while (start <= end - 4096) { + /* TODO don't free pages used by modules/initrd */ + pfree(start); + start += 4096; + } + } + } + + /* copy the PML4 somewhere in kernel memory */ + /* need to know the physical address */ + MEM_KERNEL = (getcr3() & ~(uintptr_t)0xfff); +#if 0 + dump_pml4(newp); +#endif + MEM_KERNEL = create_addrspace(); + switch_address_space(MEM_KERNEL); + /* TODO can free the top level pml4. We really should copy all of the + * initial blocks into kernel memory, as it is, this didn't really get us + * much + */ +} + +paddr_t create_addrspace(void) { + paddr_t pa; + struct pml4t *space; + int i; + + pa = palloc(); /* will zero the memory */ + space = PHY2VIRTP(pa); + /* higher half copied into every address space */ + for (i=128;i<512;i++) { + space->pml4e[i] = KERNEL_PML4->pml4e[i]; + } + printk("created address space %llx\n", pa); + return pa; +} + +#define PML4i(vaddr) ( (((vaddt_t) vaddr) >> 39) & 0x1ff ) +#define PDPi(vaddr) ( (((vaddt_t) vaddr) >> 30) & 0x1ff ) +#define PDi(vaddr) ( (((vaddt_t) vaddr) >> 21) & 0x1ff ) +#define PTi(vaddr) ( (((vaddt_t) vaddr) >> 12) & 0x1ff ) + +#if 0 +paddr_t getphysaddr(uint64_t space, vaddr_t vaddr) { + struct vaddr_decode ds; + decode_vaddr(&ds, (struct pml4 *)space, vaddr); + +} +#endif + +/* TODO this can probably be done inline, but this makes it a bit easier to + * think about + * should this return an integer or something to indicate if it was a complete + * mapping? + */ +#define VADDRMASK 0xfffffffffffff000 +void decode_vaddr(struct vaddr_decode *ds, uint64_t space, vaddr_t vaddr) { + ds->pml4e = (struct pml4e){ 0 }; + ds->pdpe = (struct pdpe){ 0 }; + ds->pde = (struct pde){ 0 }; + ds->pte = (struct pte){ 0 }; + ds->page = 0; +#if 0 + /* broken down virtual address */ + int pml4offset, pdpoffset, pdoffset, ptoffset; + paddr_t offset; /* physical page offset */ +#endif + ds->pml4offset = (vaddr >> 39) & 0x1ff; /* index into pagemap level 4 table */ + ds->pdpoffset = (vaddr >> 30) & 0x1ff; /* index into */ + ds->pdoffset = (vaddr >> 21) & 0x1ff; + ds->ptoffset = (vaddr >> 12) & 0x1ff; + ds->offset = vaddr & 0xfff; + + ds->present = 0; /* assume not present */ + /* offset = vaddr & 0xfff; i.e. low 12 bits */ + +#if 0 + struct pml4 *pml4; /* pml4 base address */ + paddr_t pml4_phys; /* pml4 physical base address */ +#endif + ds->pml4_phys = space & VADDRMASK; + ds->pml4t = PHY2VIRTP(ds->pml4_phys); + +#if 0 + printk("space = %llx ds->pml4t = %llx ", space, ds->pml4t); + struct pml4e pml4e; /* entry in page map level 4 */ + /* at pml4 + 8 * pml4offset */ + printk(" pml4offset = %llx\n", ds->pml4offset); +#endif + ds->pml4e = ds->pml4t->pml4e[ds->pml4offset]; + ds->level = 1; + +#if 0 + struct pdpe pdpe; /* entry in page directory pointer table */ + /* at virt(pml4e.addr) + 8 * pdpoffset */ + /* does not exist unless pml4e.present */ + /* pdpe.ps == 1 means 1 GB pages, and the next level doesn't exist */ + /* offset = vaddr & 0x3fffffff, physical page = standard - offset */ +#endif + if (!ds->pml4e.present) { + return; + } + + ds->pdpt_phys = ds->pml4e.addr << 12; + ds->pdpe = ((struct pdpt *)PHY2VIRTP(ds->pdpt_phys))->pdpe[ds->pdpoffset]; + ds->level = 2; + if (ds->pdpe.ps == 1) { + paddr_t mask = GB(1)-1; + /* 1 GB pages */ + ds->pagesize = GB(1); + ds->offset = vaddr & mask; + ds->present = ds->pdpe.present; + ds->page = ds->pdpe.addr << 12; + return; + } + + +#if 0 + struct pde pde; /* entry in page directory table */ + /* does not exist unless pdpe.present */ + /* at virt(pdpe.addr) + 8 * pdoffset */ + /* pde.ps == 0 means 4 KB pages and the next level exists if present */ + /* pde.ps == 1 means 2 MB pages, pts doesn't exist and + * offset = vaddr & 0x1fffff; i.e. low 21 bits + * physical page address = same mask minus offset bits + */ +#endif + if (!ds->pdpe.present) { + return; + } + ds->pdt_phys = ds->pdpe.addr << 12; + ds->pde = ((struct pdt *)PHY2VIRTP(ds->pdt_phys))->pde[ds->pdoffset]; + ds->level = 3; + if (ds->pde.ps == 1) { + paddr_t mask = MB(2)-1; + /* 2 MB pages */ + ds->offset = vaddr & mask; + ds->page = ds->pde.addr << 12; + ds->present = ds->pdpe.present; + return; + } + +#if 0 + struct pte pte; /* entry in page table */ + /* does not exist unless pde.present */ + /* at virt(pde.addr) + 8 * ptoffset */ + /* offset = vaddr & 0xfff; i.e. low 12 bits */ + /* physical page address = pde.addr, or vaddr & 0xffffffffff000 */ +#endif + if (!ds->pde.present) { + return; + } + ds->pt_phys = ds->pde.addr << 12; + ds->pte = ((struct pt *)PHY2VIRTP(ds->pt_phys))->pte[ds->ptoffset]; + ds->level = 4; + ds->page = ds->pte.addr << 12; + /* offset is good already */ + ds->present = ds->pte.present; + + return; +#if 0 + /* actual physical addresses */ + paddr_t pml4_phys; + paddr_t pdpt_phys; + paddr_t pdt_phys; + paddr_t pt_phys; + + int present; /* physical address actually mapped */ + paddr_t paddr; /* decoded physical address */ +#endif + +} + +#if 1 +void print_decode(struct vaddr_decode *ds, uint64_t space, vaddr_t vaddr) { + struct vaddr_decode d; + if (!ds) { + ds = &d; + } + decode_vaddr(ds, space, vaddr); + printk("%llx %llx %u offsets %x %x %x %x,\n tables %llx %llx %llx", + space, vaddr, ds->level, + ds->pml4offset, ds->pdpoffset, ds->pdoffset, ds->ptoffset, + ds->pdpt_phys, ds->pdt_phys, ds->pt_phys + ); + if (ds->present) { + printk(" page %llx", ds->page); + } + printk("\n"); +} +#endif + +#define PT_PAMASK 0x000FFFFFFFFFF000 + +/* could page fault it */ +/* map a physical page to a virtual address */ +void *map_page(uint64_t space, vaddr_t addr, paddr_t pa, unsigned int flags) { + struct vaddr_decode ds; + int trace = 0; + + trace = flags & MAP_TRACE; + + //if (space == 0) space = kernel_space; + if (!space) { + panic("attempted to map into a null space\n"); + } + + /* if addr == 0 then caller doesn't care where it's mapped */ + if (addr == 0) { + addr = (vaddr_t)kvalloc(4096); + } + if (trace) { + printk("map_page space = %llx vaddr %llx paddr %llx\n", space, addr, pa); + } + /* break up the vaddr and get pointers to tables */ + decode_vaddr(&ds, (uint64_t)space, addr); + + //printk("map page\n"); + flags &= 0xC1C; /* only accept the user, rw, and available bits */ + /* now also accepting PCD and PWT bits */ + if (pa) { + flags |= PT_PRESENT; + } + +#define PML4E ( *(uint64_t *)&(ds.pml4t->pml4e[ds.pml4offset])) + /* do we need to make a pdp */ + if (!ds.pml4e.present) { + /* need a new pdpt for this address */ + paddr_t page_table; + + if (trace) { + printk("new pdpt"); + } + page_table = palloc(); + ds.pdpt_phys = page_table; + if (trace) { + printk(" at paddr %llx", page_table); + } + //ds.pdp = PHY2VIRTP(page_table); + + //*(uint64_t *)&ds.pml4t->pml4e[ds.pml4offset] = (page_table | flags); + + PML4E = (page_table | flags); + } + if (trace) { + printk(" ds.pml4t->pml4e[%x] = %llx\n", ds.pml4offset, PML4E); + } +#undef PML4E + +#define PDPE ( *(uint64_t *)&((struct pdpt *)PHY2VIRTP(ds.pdpt_phys))->pdpe[ds.pdpoffset] ) + if (!ds.pdpe.present) { + /* need a new pdt for this address */ + paddr_t page_table; + + if (trace) { + printk("new pdt"); + } + page_table = palloc(); + ds.pdt_phys = page_table; + if (trace) { + printk(" at paddr %llx", page_table); + } + //ds.pd = PHY2VIRTP(page_table); + + PDPE = (page_table | flags); + } + if (trace) { + printk(" ds.pdpt->pdpe[%x] = %x\n", ds.pdpoffset, PDPE); + } +#undef PDPE + +#define PDE ( *(uint64_t *)&((struct pdt *)PHY2VIRTP(ds.pdt_phys))->pde[ds.pdoffset] ) + if (!ds.pde.present) { + /* need a new pt for this address */ + paddr_t page_table; + if (trace) { + printk("new pt"); + } + + page_table = palloc(); + ds.pt_phys = page_table; + if (trace) { + printk(" at paddr %llx", page_table); + } + //ds.pt = PHY2VIRTP(page_table); + + PDE = (page_table | flags); + } + if (trace) { + printk(" ds.pd->pde[%x] = %x\n", ds.pdoffset, PDE); + } +#undef PDE + + /* TODO check if the page is present in the table already, if it is, we may be losing a + * physical page + */ +#define PTE ( *(uint64_t *)&((struct pt *)PHY2VIRTP(ds.pt_phys))->pte[ds.ptoffset] ) + if (pa) { + //printk("mapping addr %llx to %llx\n", (uintptr_t)addr, (uintptr_t)(pa | flags )); + if (trace) { + printk(" ds.pt->pte[%x] = %x\n", ds.ptoffset, pa | flags); + } + PTE = (pa | flags); + //*(uint64_t *)&((struct pt *)PHY2VIRTP(ds.pt_phys))->pte[ds.ptoffset] = (pa | flags); + flush_tlb((void *)addr); + } else { + /* a not present page, will need to be faulted in */ + printk("warning: mapping a non present page\n"); + PTE = (pa | (flags & 0xffe)); + flush_tlb((void *)addr); + } +#undef PTE + if (trace) { + print_decode(&ds, (uint64_t)space, addr); + } + + return (void *)addr; +} + +paddr_t unmap_page(uint64_t space, vaddr_t addr) { + struct vaddr_decode ds; + + /* break up the vaddr and get pointers to tables */ + decode_vaddr(&ds, space, addr); + if (!ds.present) { + /* not mapped */ + return 0; + } + /* TODO pteptr not set by decode */ + ds.pteptr->present = 0; + flush_tlb((void *)addr); + return ds.page; +} + +void vunmap(uint64_t space, vaddr_t addr, paddr_t pa, size_t length) { + + if (!space) { + panic("attempted to unmap in a null space\n"); + } + + while (length >= 4096) { + /* freeing these is the responsibility of the caller. + * they might be shared, so we don't/can't do that here + */ + unmap_page(space, addr); + addr += 4096; + pa += 4096; + length -= 4096; + } +} + +/* what about non-aligned? */ +/* assume that the source is already mapped in */ +/* destination space, dest start address (in the destination space), + * from start address in the current space, number of bytes to copy + * this function really only works in kernel space, because it relies + * on the physical memory being mapped in. + * we could, in principle, have a 4KB virtual address space and + * map the pages in as needed + */ +void copy_to_space(uint64_t dspace, void *to, void *from, size_t length) { + vaddr_t dst, src; /* to and from */ + //paddr_t dpage, spage; /* to and from physical */ + size_t dfrag = 0, sfrag = 0; /* fragment size of non-aligned */ + + dst = (vaddr_t) to; + src = (vaddr_t) from; + + while (length) { + size_t n; /* number of bytes to copy */ + /* get locally mapped virtual addresses of physical pages */ + //to = PHY2VIRTP(getphysaddr(dspace, dst)); + sfrag = 4096 - (src & 0x1FFF); /* bytes to bring src up to page boundary */ + dfrag = 4096 - (dst & 0x1FFF); /* bytes to bring dest up to page boundary */ + + if (sfrag < dfrag) { + n = sfrag < length ? sfrag : length; + } else { + n = dfrag < length ? dfrag : length; + } + /* copy up to length/fragment from source bytes */ + memmove((void *)dst, (void *)src, n); + length -= n; + src += n; + dst += n; + } +} + +#define PML4E(space, addr) ((struct pdp *)(((struct pml4 *)space)->pml4e[PML4i(addr)])) +void vmap(uint64_t space, vaddr_t addr, paddr_t pa, size_t length, unsigned int flags) { + + if (!space) { + panic("attempted to map into a null space\n"); + } + + while (length >= 4096) { + map_page(space, addr, pa, flags); + addr += 4096; + pa += 4096; + length -= 4096; + } +} + +void vmapz(uint64_t space, vaddr_t addr, size_t length, unsigned int flags) { + paddr_t pa; + + if (!space) { + panic("attempted to map into a null space\n"); + } + + //printk("mapping %zx bytes at %llx\n", length, addr); + while (length > 0) { + pa = palloc(); + map_page(space, addr, pa, flags); + addr += 4096; + length -= 4096; + } + //printk("mapped\n"); +} + +/* TODO should probably specify pages rather than bytes */ +void *kalloc(size_t bytes) { + void *va = kvalloc(bytes); + vmapz(MEM_KERNEL, (vaddr_t)va, bytes, MEM_RW); + return va; +} + +/* TODO we can free the space, but we really can't free the virtual address */ +/* so we can keep a list of free virtual addresses in the kernel space + * for kalloc to reuse... + */ +void kfree(void *va) { + unmap_page(MEM_KERNEL, (vaddr_t)va); + return; +} + +void vmem_test() { + void *kvtest; + paddr_t pa; + /* test an allocation */ + printk("starting vmem test\n"); + kvtest = kvalloc(4096); + printk("allocated virtual block at %llx\n", kvtest); + pa = palloc(); + printk("allocated physical block at %llx\n", pa); + vmap(MEM_KERNEL, (vaddr_t) kvtest, pa, 4096, MAP_TRACE); + test_address(kvtest); + vunmap(MEM_KERNEL, (vaddr_t) kvtest, pa, 4096); + + vmap(MEM_KERNEL, (vaddr_t) GB(32), pa, 4096, MAP_TRACE); + test_address((void *)GB(32)); + vunmap(MEM_KERNEL, (vaddr_t) GB(32), pa, 4096); + printk("ending vmem test\n"); +#if 0 + /* should page fault */ + test_address((void *)GB(32)); +#endif +} + +#define NUMOBJSTACKS 10 +/* + * carefully constructed to take 16 bytes, I hope + * This isn't really a requirement, but since + * they'll be allocated out of kernel objects + * anyway, they will be rounded up to a power of two + */ +struct kobjstack { + void *top; /* pointer to first free object */ + uint32_t freeobjs; + uint32_t size; /* for allocating objects of a given size */ + struct spinlock_t spinlock; +}; + +struct kobjstack kobjstacks[NUMOBJSTACKS]; + +void vmem_init() { + int i; + + printk("initializing virtual memory\n"); + /* rebuild the boot page tables */ + + /* build the kernel object allocator */ + for (i=0; i < NUMOBJSTACKS; i++) { + kobjstacks[i].top = 0; + kobjstacks[i].freeobjs = 0; + kobjstacks[i].size = 1 << (i+3); + spinlock_init(&kobjstacks[i].spinlock); + } +} + +void dumpostacks(int f, int t) { + int i; + + if (f < 0) f = 0; + if (t >= NUMOBJSTACKS) t = NUMOBJSTACKS-1; + + for (i=f; i <= t; i++) { + /* TODO spinlock the stack for the print */ + printk("stack %4u: %u free, top %llx\n", + kobjstacks[i].size, + kobjstacks[i].freeobjs, + kobjstacks[i].top + ); + } +} + +void mem_init(struct multiboot_info *mbi) { + phys_init(mbi); + /* physical page allocator is working now */ + + vmem_init(); +} + +/* + * kernel objects, grouped by size power of two + * minimum size is a pointer (i.e. 8 bytes), + * maximum size is a page, i.e 4096 bytes + * when free, each object points to the next free object + * each stack size allocator is itself a kernel object + */ + +static size_t nextpowerof2(size_t x) { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x |= x >> 32; + x++; + + return x; +} + +void *koalloc(size_t size) { + int stack_index = 0; + struct kobjstack *ostack; + void *obj; + + /* Assembly: bsr rax, rdi; bsr rbx, rdi; cmp rax, rbx; je .done; shl rax, 1; .done ret; */ + //printk("allocating %u", size); + size = nextpowerof2(size); + //printk(" using size %u", size); + size >>= 4; + while (size) { + stack_index++; + size >>= 1; + } + //printk(" from stack %u\n", stack_index); + //dumpostacks(stack_index,stack_index); + + ostack = &kobjstacks[stack_index]; + + if (!ostack) { + panic("no kernel object stack for size %u\n", size); + } + + //printk("locking stack\n"); + spinlock_acquire(&ostack->spinlock); + //printk("got lock\n"); + + if (!ostack->freeobjs) { + void *newpage; + uintptr_t free; + newpage = kalloc(4096); + /* TODO should probably map this in to high memory */ + for (free = (uintptr_t)newpage; free < (uintptr_t)newpage + 4096; free += ostack->size) { + *(uintptr_t *)free = free + ostack->size; + } + ostack->top = newpage; + ostack->freeobjs = 4096/ostack->size; + //printk("ostack init stack %4u, %u free\n", size, ostack->freeobjs); + } + + obj = ostack->top; + ostack->top = (void *)*(uintptr_t *)obj; + ostack->freeobjs--; + + spinlock_release(&ostack->spinlock); + //printk("ostack alloc stack %4u, %u free, top %llx, obj %llx\n", ostack->size, ostack->freeobjs, ostack->top, obj); + //dumpostacks(stack_index,stack_index); + + return obj; +} + +/* it might be worth an idle thread doing a qsort and see + * if we can release any memory if there's a least + * a page worth of free objects + */ +void kofree(void *obj, size_t size) { + int stack_index = 0; + struct kobjstack *ostack; + + /* Assembly: bsr rax, rdi; bsr rbx, rdi; cmp rax, rbx; je .done; shl rax, 1; .done ret; */ + //printk("freeing %u", size); + size = nextpowerof2(size); + //printk(" using size %u", size); + size >>= 4; + while (size) { + stack_index++; + size >>= 1; + } + //printk(" from stack %u\n", stack_index); + //dumpostacks(stack_index,stack_index); + + ostack = &kobjstacks[stack_index]; + + spinlock_acquire(&ostack->spinlock); + + *(vaddr_t *)obj = (vaddr_t)ostack->top; + ostack->top = obj; + ostack->freeobjs++; + spinlock_release(&ostack->spinlock); + //dumpostacks(stack_index,stack_index); + printk("ostack free stack %4u, %u free, top %llx, obj %llx\n", ostack->size, ostack->freeobjs, ostack->top, obj); +} diff --git a/mem.h b/mem.h new file mode 100644 index 0000000..9d56304 --- /dev/null +++ b/mem.h @@ -0,0 +1,328 @@ +#ifndef MEM_H_ +#define MEM_H_ 1 + +#include +#include + +#include "multiboot.h" + +typedef uintptr_t paddr_t; +typedef uintptr_t vaddr_t; + +extern uintptr_t _kernel_end; +extern uintptr_t _kernel_vma; +extern uintptr_t _kernel_phys_end; +extern uintptr_t _kernel_size; +extern uintptr_t _asm_physmap; + +extern void *bios_ebda; + +/* These are in memx64.s */ +extern void flush_tlb(void *va); +extern void switch_address_space(uintptr_t cr3); + +void vmem_test(); + +/* map one physical page to a virtual address */ +#define PHY2VIRTP(pa) ( (void *)(((paddr_t)pa) + memory.physmap) ) +/* only works for the linear mapped physical memory */ +#define VIRTP2PHY(va) ( (paddr_t)(((vaddr_t)(va)) - memory.physmap) ) +#define KB(x) ((x) * (1<<10)) +#define MB(x) ((x) * (1<<20)) +#define GB(x) (((uint64_t)x) * (1ULL<<30)) +#define TB(x) (((uint64_t)x) * (1ULL<<40)) +uintptr_t mem_align(uintptr_t a, size_t alignment); + +/* + * kernel is at 0xFFFFFFFF80000000 and up + * we will map in all physical memory below that + */ + +/* structure of physical frames */ +struct frame { + struct frame *next; + void *paddr; /* the physical address of the frame */ + struct frame *prev; + void *top, *base; + uint64_t magic; +}; + +/* a map of memory */ +/* TODO fix these types. maybe add a physvend */ +struct memory { + void *kbase; /* at 0xFFFFFFFF00000000 */ + void *kstack; /* grow down from -1 ? */ + uintptr_t kend; /* virtual address of top of kernel */ + uintptr_t kphysend; /* physical address of top of kernel */ + uintptr_t phys_max; /* maximum (usable) physical address + 1 */ + uintptr_t physmap; /* all physical memory is mapped starting here */ + /* kbase + kernel size ?, then put dynamic kernel structures above that, + * growing toward the stack? + */ +}; + +/* TODO keep track of the kernel virtual address of page, we could + * re-alloc it in the same spot + */ +struct pstack { + paddr_t page_addr[510]; /* physical addresses of frames */ + size_t pages; /* free pages in this stack frame */ + struct pstack *next; /* address of next stack frame */ +}; + +/* + * free a page: + * if there is no room in the stack, then + * set the phys.stack.next to the new page + * map the new page in at phys.stack + * then add to phys.stack.pages + * the physical address to the stack.pages + * + * get a page: + * if there are no pages free on the stack: + * map in the phys.stack.next to phys.stack + * hand out the old page, which is on pages[free] + * + * it's probably worth memoizing the virtual address + * of the exact place where the stack vm map page is stored + * in the page tables + */ +extern struct memory memory; + +/* physical memory manager */ +struct phys { + struct frame *base; + struct frame *top; + size_t frame_size; + int64_t offset; /* paddr = vaddr + offset */ + struct phys *next; + + vaddr_t kmap; /* physical address of page table entry for *stack */ + vaddr_t *page_map; /* temporary page map space */ + struct pstack *stack; /* virtual address of top of page stack */ + size_t free; /* total number of free pages */ + +}; +/* + * pa = va + pb - vb + * va = pa + vb - pb + */ + +extern struct memory memory; +extern struct phys phys; + +/* + * ops: + * add - add a contiguous chunk of physical memory and a virtual address base. + * alloc - get a page of memory + * free - free a page of memory + * tinyalloc - get a really small page, 256 bytes + * tinyfree - + */ + +/* TODO lazy setup of the memory block stack */ +/* TODO modify phys_init to be "more core" to support + * discontiguous memory addresses + */ + +/* These don't have to be the same as the hardware paging, but + * it probably helps + */ +/* Hmm, need to map in memory for this to work */ +/* + * pa = va + pb - vb + * va = pa + vb - pb + */ + +uint64_t getcr3(); /* probably in assembly */ + +paddr_t create_addrspace(void); + + +void test_address(void *); +void phys_init(struct multiboot_info *mbi); +void mem_init(); +paddr_t palloc(); +void pfree(paddr_t vmem); +void *koalloc(size_t size); +void kofree(void *, size_t size); +void dumpostacks(int f, int t); + +uint64_t makecr3(void *pml4) ; +void *virt2phys(void *vaddr) ; + +extern uint64_t kernel_space; /* physical address of kernel space? */ +#define MEM_KERNEL (kernel_space) +#define KERNEL_PML4 ((struct pml4t *)PHY2VIRTP(kernel_space)) + +extern paddr_t pci_start; + +struct cr3 { + uint64_t reslow:3; /* should be cleared to zero */ + uint64_t pwt:1; /* page writethrough */ + uint64_t pcd:1; /* page cache disable */ + uint64_t resmed:7; /* clear to zero */ + uint64_t addr:40; /* pml4 page base physical address shifted right 12 */ + uint64_t reshigh:12; /* clear to zero */ +}; + +/* TODO last bit (63) is actually a no-execute bit */ +struct pml4e { + uint64_t present:1; + uint64_t rw:1; + uint64_t user:1; + uint64_t pwt:1; + uint64_t pcd:1; + uint64_t a:1; + uint64_t ign:1; + uint64_t mbz:2; + uint64_t avl:3; + uint64_t addr:40; /* pml4 page base physical address shifted right 12 */ + uint64_t reshigh:12; /* clear to zero */ +}; + +struct pdpe { + uint64_t present:1; + uint64_t rw:1; + uint64_t user:1; + uint64_t pwt:1; + uint64_t pcd:1; + uint64_t a:1; + uint64_t ign:1; + uint64_t ps:1; /* I think this is the PS bit: 0 for 4KB or 2MB, 1 for 1GB */ + uint64_t mbz:1; + uint64_t avl:3; + uint64_t addr:40; /* pml4 page base physical address shifted right 12 */ + uint64_t reshigh:12; /* clear to zero */ +}; + +struct pde { + uint64_t present:1; + uint64_t rw:1; + uint64_t user:1; + uint64_t pwt:1; + uint64_t pcd:1; + uint64_t a:1; + uint64_t ign:1; + uint64_t ps:1; /* I think this is the PS bit: 0 for 4KB */ + uint64_t mbz:1; + uint64_t avl:3; + uint64_t addr:40; /* pml4 page base physical address shifted right 12 */ + uint64_t reshigh:12; /* clear to zero */ +}; + +struct pte { + uint64_t present:1; + uint64_t rw:1; + uint64_t user:1; + uint64_t pwt:1; + uint64_t pcd:1; + uint64_t a:1; + uint64_t d:1; + uint64_t pat:1; + uint64_t g:1; + uint64_t avl:3; + uint64_t addr:40; /* pml4 page base physical address shifted right 12 */ + uint64_t reshigh:12; /* clear to zero */ +}; + +struct pml4t { + struct pml4e pml4e[512]; +}; + +struct pdpt { + struct pdpe pdpe[512]; +}; + +struct pdt { + struct pde pde[512]; +}; + +struct pt { + struct pte pte[512]; +}; + +struct vaddr_decode { + /* broken down virtual address */ + int pml4offset, pdpoffset, pdoffset, ptoffset; + paddr_t offset; /* physical page offset */ + + struct pml4t *pml4t; /* pml4 base address */ + paddr_t pml4_phys; /* pml4 physical base address */ + + struct pml4e pml4e; /* entry in page map level 4 */ + /* at pml4 + 8 * pml4offset */ + + struct pdpe pdpe; /* entry in page directory pointer table */ + /* at virt(pml4e.addr) + 8 * pdpoffset */ + /* does not exist unless pml4e.present */ + /* pdpe.ps == 1 means 1 GB pages, and the next level doesn't exist */ + /* offset = vaddr & 0x3fffffff, physical page = standard - offset */ + + struct pde pde; /* entry in page directory table */ + /* at virt(pdpe.addr) + 8 * pdoffset */ + /* does not exist unless pdpe.present */ + /* pde.ps == 0 means 4 KB pages and the next level exists if present */ + /* pds.ps == 1 means 2 MB pages, pts doesn't exist and + * offset = vaddr & 0x1fffff; i.e. low 21 bits + * physical page address = same mask minus offset bits + */ + + struct pte pte; /* entry in page table */ + /* at virt(pde.addr) + 8 * ptoffset */ + /* does not exist unless pde.present */ + /* offset = vaddr & 0xfff; i.e. low 12 bits */ + /* physical page address = pde.addr, or vaddr & 0xffffffffff000 */ + + struct pte *pteptr; /* a pointer to the actual entry */ + + /* actual physical addresses */ + paddr_t pdpt_phys; + paddr_t pdt_phys; + paddr_t pt_phys; + + int present; /* physical address actually mapped */ + paddr_t paddr; /* decoded physical address */ + int level; + paddr_t page; /* physical address of page */ + size_t pagesize; +}; + +void decode_vaddr(struct vaddr_decode *ds, uint64_t space, vaddr_t vaddr); +void print_decode(struct vaddr_decode *ds, uint64_t space, vaddr_t vaddr); + +/* returns addr. addr may be null in which case the function will allocate virtual space */ +void *map_page(uint64_t space, vaddr_t addr, paddr_t pa, unsigned int flags); +void vmapz(uint64_t space, vaddr_t addr, size_t length, unsigned int flags); + +#define PT_PRESENT 0x1 +#define MEM_PCD (1<<4) +#define MEM_PWT (1<<3) +#define MEM_USER (1<<2) +#define MEM_RW (1<<1) +#define MAP_TRACE (1<<16) + +#define MEM_NOCACHE (MEM_PCD | MEM_PWT) +#define MEM_MMIO (PT_PRESENT|MEM_NOCACHE|MEM_RW) +#define MEM_USERSPACE (MEM_USER | MEM_RW) + +#if 0 +typedef struct page { + u32int present : 1; // Page present in memory + u32int rw : 1; // Read-only if clear, readwrite if set + u32int user : 1; // Supervisor level only if clear + u32int accessed : 1; // Has the page been accessed since last refresh? + u32int dirty : 1; // Has the page been written to since last refresh? + u32int unused : 7; // Amalgamation of unused and reserved bits + u32int frame : 20; // Frame address (shifted right 12 bits) +} page_t; + +#endif + +struct bios_data_area { + uint16_t comaddr[4]; + uint16_t lpt[3]; + uint16_t ebda; /* <<4 to get actual */ +}; + +#endif -- 2.40.0