%macro zeropage32 1 mov edi, %1 xor eax, eax mov ecx, 1024 ; 1024 times for 4 byte stosd rep stosd %endmacro USE32 section .multiboot global multiboot_magic:data global multiboot_infoptr:data global _multiboot_entry:function _multiboot_entry: jmp start32 ; multiboot header needs to be 4 byte aligned. ; it would be anyway I think, but make it explicit align 4 multiboot_magic: dd 0x1BADB002 ; multiboot 1 magic dd 0x00000007 ; flags: 4 KB aligned modules, memory and video informatino dd -(0x1BADB002 + 0x00000007) ; checksum multiboot_infoptr: dd 0 ; a place to store the multiboot info pointer until the kernel gets it section .text extern kernel_bootstrap PML4ADDR equ 0x10000 align 4 start32: ; boot loader passes multiboot info in ebx and eax mov [multiboot_infoptr], ebx mov [multiboot_magic], eax ; set up initial page tables ; We could also just use 1GB pages for now and let the ; actual memory manager handle it later, we just need ; these tables set up so that we can run the kernel ; in the higher half. ; free memory at this point is pretty much everything normally free ; below 1 MB ; 0x1000 - 0x7ffff is all free, plus 0x80000 - bottom of EBDA ; map the first four megabytes of physical memory twice ; once to identity map it, once to map it into the high kernel ; virtual memory area for a higher half kernel ; 0x10000 PML4 (512 GB/entry) ; 0x11000 PDPT identity map (1 GB/entry) ; 0x12000 PDT identity map (2 MB/entry) ; 0x13000 PT identity map 0-2 MB (4 KB/entry) ; 0x14000 PT identity map 2-4 MB ; 0x15000 PDPT kernel map for higher half (1 GB/entry) ; 0x16000 PDT kernel map for higher half (2 MB/entry) ; 0x17000 PT kernel map 0-2 MB ; 0x18000 PT kernel map 2-4 MB ; 0x19000 1GB PDPT physical low 512 GB map ; 0x20000-0x2ffff 64KB initial stack ; zero the initial stack mov edi, 0x20000 xor eax, eax mov ecx, PML4ADDR rep stosd mov esp, 0x30000 ; set to top of stack ; The 3 at the end of these marks the page as present and read/write ; PML4 zeropage32 PML4ADDR mov edi, PML4ADDR ; address of initial PML4 mov [edi], dword 0x11003 ; first entry, covers virtual 0-512 GB ; higher half map mov [edi + 511 * 8], dword 0x15003 ; last entry, covers upper virtual 512 GB ; identity map PDPT zeropage32 0x11000 mov edi, 0x11000 mov [edi], dword 0x12003 ; virtual 0-1 GB ; higher half PDPT zeropage32 0x15000 mov edi, 0x15000 ; 0xFFFFFFFF80000000 kernel is at here + 1 MB mov [edi + 510 * 8], dword 0x16003 ; virtual -2 - -1 GB ; page directory zeropage32 0x12000 mov edi, 0x12000 mov [edi], dword 0x13003 ; first 2 MB mov [edi+8], dword 0x14003 ; second 2 MB ; higher half PDT zeropage32 0x16000 mov edi, 0x16000 mov [edi], dword 0x017003 mov [edi+8], dword 0x018003 ; page table ; map the first 4 MB ; Don't need to zero-page these, because we're going ; to fill them anyway mov edi, 0x13000 mov eax, 0x17000 mov ebx, 0x3 mov ecx, 1024 ; i.e. two tables worth .donext: mov [edi], ebx mov [eax], ebx add ebx, 0x1000 add eax, 8 add edi, 8 loop .donext mov eax, PML4ADDR mov cr3, eax ; paging is off, so this is safe ; enable PAE mov eax, cr4 or eax, 0x20 mov cr4, eax ; enable long mode mov ecx, 0xC0000080 rdmsr or eax, 0x100 wrmsr ; enable paging and enter long mode (still 32-bit) mov eax, cr0 or eax, 0x80000000 mov cr0, eax ; Load the long mode GDT. mov eax, GDT_load lgdt [GDT_load] ; set the SS register. Probably not needed mov ax, 0x18 ; 0x18 is 64 bit kernel data segment mov ss, ax ; jump to the 64 bit code segment jmp 0x10:bootstrap64 USE64 align 8 bootstrap64: ; TODO set up the higher half map here. easier to do in 64 bit code ; from the AMD manual... ; System software must create at least one 64-bit TSS for use after ; activating long mode, and it must execute the LTR instruction, in ; 64-bit mode, to load the TR register with a pointer to the 64-bit TSS ; that serves both 64-bit-mode programs and compatibility-mode ; programs. ; fill in the TSS selector with the address of the TSS mov rax, qword TSS mov rbx, qword TSS_selector ; load up the address fragments mov [rbx + 2], ax shl rax, 16 mov [rbx + 4], al shl rax, 8 mov [rbx + 7], al shl rax, 8 mov [rbx + 8], eax ; calculate the TSS segment offset mov rax, GDT64 sub rbx, rax ; rbx still holds the TSS_selector address ; and load it LTR bx ; and jump to the start code mov rax, qword kernel_bootstrap jmp rax USE32 section .data global GDT64:data global GDT_load:data global GDT_tss:data %define MAX_CPUS 2 ; 00 00 00 00 ; II GI A? II ; G = -01- ---- so G = 2 always, unless you want to use the avl bit and make it 3 ; or G = 4 for a 32 bit segment ; A = 1pl1 1CRA I don't think C matters, and RA should be ignored, so 8? ; but in principle C == 1 for code? ; R == 1 (i.e +2 for read write). Supposedly ignored, but should't hurt ; so, 8 + 0 + 2 + 0 == non conforming read write ; Thus 00 20 9A 00 for code ; 00 20 F2 00 for user code ; 00 L0 AC 00 ; L = 2 for 64 bit, 4 for 32 bit ; A = 9 for kernel, F for user ; C = A for code, 2 for data ;summary ; 0x00209A00 ; 64 bit kernel code ; 0x00209200 ; 64 bit kernel data ; 0x GDT64: dd 0,0 dd 0,0 dd 0x00000000, 0x00209A00 ; 0x10: 64-bit Kernel Code dd 0x00000000, 0x00209200 ; 0x18: 64-bit Kernel Data dd 0x00000000, 0x0040FA00 ; 0x20: 32-bit User Code, should be unused dd 0x00000000, 0x0040F200 ; 0x28: 32-bit User Data dd 0x00000000, 0x0020FA00 ; 0x30: 64-bit User Code dd 0x00000000, 0x0000F200 ; 0x38: 64-bit User Data TSS_selector: times MAX_CPUS dd 0, 0x00008900, 0, 0 ; 0x38+16*n: TSS 0 align 16 GDT_load: dw GDT_load - GDT64 - 1 ; limit dq GDT64 ; base dd 0 ; pad align 16 TSS: dd 0 ; ignored dq 0 ; rsp for cpl 0 dq 0 ; rsp for cpl 1 dq 0 ; rsp for cpl 2 times 2 dd 0 ; ignored times 7 dq 0 ; rsp for ist 1-7 times 2 dd 0 ; ignored dd 0 ; ignored dd 0 ; base address for io permission bitmap GDT_tss: dq TSS_selector - GDT64