From: Nathan Wagner Date: Tue, 25 Oct 2016 01:34:47 +0000 (-0500) Subject: processes and multitasking X-Git-Url: https://pd.if.org/git/?p=zos;a=commitdiff_plain;h=7c1b1bcd0cd0b3e3738afe96aa9b94a275a7951b processes and multitasking --- diff --git a/process.c b/process.c new file mode 100644 index 0000000..046247e --- /dev/null +++ b/process.c @@ -0,0 +1,472 @@ +#include +#include + +#include "kernel.h" +#include "mem.h" +#include "process.h" + +#include "timer.h" +#include "interrupt.h" +#include + +#include +#include +#include + +/* 8 MB stack */ +#define RLIMIT_STACK_VAL 0x800000 + +/* + * memory map for processes + * 0xFFFF FFFF F800 0000 and up, kernel, can copy kernel pml4 entries + * program load at 4 MB + * kernel stack at 128 TB 7F... + * stack at 127 TB = 8000 0000 0000 (and down)/ thread stacks go somewhere + * stacks could be limited to physmem. perhaps it's ok for each address space + * to have it's own total stacks limit + * mmaps at 126 TB = 7E00 0000 0000 (and down) + * heap at 96 TB = 6000 0000 0000 (and up) + * heap at 2 TB? + * heap after program load... + * program at 4 MB = 40 0000 + */ + +/* so, to create a process, + * allocate 2 MB for stack, + * allocate whatever is needed for the code + * load in the code to the code base + * set up the registers + * set up the stack for the iretq or sysret + * iretq or sysret to start the task + */ + +/* current process is per processor? */ +struct process *current_task = 0; +static struct process *available = 0; /* a stack of unused task structs */ +static struct process mainTask; +static struct process *idle_task; +static void idle_task_main() { + while (1) { + halt(); + schedule(); + } +} + +struct pqueue { + struct process *head; + struct process *tail; + struct spinlock_t lock; + unsigned int size; +}; + +static struct pqueue sleepqueue; +static struct pqueue runqueue; +static struct pqueue terminating; + +void initqueue(struct pqueue *q) { + q->lock = (struct spinlock_t){0}; + q->head = 0; + q->tail = 0; + q->size = 0; +} + +void dumpqueue(struct pqueue *q, char *name) { + struct process *task; + + if (name) { + printk("%u %s:", timer_ticks, name); + } else { + printk("%u queue %llx:", timer_ticks, q); + } + + for (task = q->head; task; task = task->next) { + printk(" %u", task->pid); + if (task->sleep) { + printk(":%u", task->sleep); + } + } + printk("\n"); +} + +/* dequeue sleeper is no different than a regular dequeue */ +void enqueue_sleeper(struct pqueue *q, struct process *task) { + struct process *sleeper; + + spinlock_acquire(&q->lock); + + task->next = 0; + task->prev = 0; + + if (!q->head) { + q->head = task; + task->prev = task; + } else { + for (sleeper = q->head; sleeper; sleeper = sleeper->next) { + if (task->sleep < sleeper->sleep) { + /* insert before this one */ + task->next = sleeper; + task->prev = sleeper->prev; + if (sleeper != q->head) { + sleeper->prev->next = task; + } else { + q->head = task; + } + sleeper->prev = task; + break; + } + } + + if (!sleeper) { + /* if we got here, we're the last task */ + task->next = 0; + task->prev = q->head->prev; + task->prev->next = task; + q->head->prev = task; + } + } + + q->size++; + + spinlock_release(&q->lock); +} + +void enqueue(struct pqueue *q, struct process *task) { + spinlock_acquire(&q->lock); + + task->next = 0; + if (q->head) { + task->prev = q->head->prev; + task->prev->next = task; + q->head->prev = task; + } else { + q->head = task; + task->prev = task; + } + q->size++; + + spinlock_release(&q->lock); +} + +struct process *dequeue(struct pqueue *q) { + struct process *task; + + spinlock_acquire(&q->lock); + task = q->head; + if (task) { + q->head = task->next; + if (q->head) { + q->head->prev = task->prev; + } + task->next = 0; + task->prev = 0; + } + q->size--; + spinlock_release(&q->lock); + return task; +} + +void enqueue_runnable(struct process *task) { + enqueue(&runqueue, task); +} + +struct process *dequeue_runnable(void) { + return dequeue(&runqueue); +} + +pid_t getpid() { + return current_task->pid; +} + +void exit(int status) { + current_task->status = status; + + /* put in parents process "exited" queue for wait() */ + schedule(); +} + +void terminate(struct process *p) { + + /* remove it from whatever queue it's in */ + /* enqueue it into terminating queue */ + + /* push it on the top of the runqueue */ + enqueue(&terminating, p); + /* free all memory, except maybe kernel stack */ + + /* remove from task queues */ + + /* add to available */ + schedule(); +} + +void yield(void); + +void sleep(uint32_t ticks) { + if (current_task->sleep) { + panic("tried to sleep pid %u which was already sleeping until %u\n", current_task->pid, current_task->sleep); + } + current_task->sleep = timer_ticks + ticks; + enqueue_sleeper(&sleepqueue, current_task); + current_task->flags &= ~TM_RUNNABLE; + yield(); +} + +static void third_main() { + sleep(500); + while (1) { + printk("%llu: Hello from A pid %lu, cpl = %x\n", timer_ticks, getpid(), current_task->pl); + sleep(300+300 * getpid()); + } +} + +static void other_main() { + while (1) { + printk("%llu: Hello from B pid %lu\n", timer_ticks, getpid()); + sleep(10000); + } +} + +#define HOG +#ifdef HOG +static void hog_main() { + static int x = 0; + static volatile uint64_t donetil = 0; + printk("first scheduled hog\n"); + while (1) { + x++; + if (timer_ticks >= donetil) { + printk("%llu hog %u running\n", timer_ticks,current_task->pid); + donetil += 1000; + } + } +} +#endif + +struct interrupt_handler pih; + +static void preemption_interrupt(struct interrupt_context *c, void *n) { + struct process *sleeper; + + /* check for sleepers */ + while (sleepqueue.head && sleepqueue.head->sleep <= timer_ticks) { + sleeper = dequeue(&sleepqueue); + sleeper->sleep = 0; + sleeper->flags |= TM_RUNNABLE; + enqueue_runnable(sleeper); + } + + if (current_task != idle_task) { + current_task->quantum--; + } else { + schedule(); + } + + if (current_task->quantum == 0) { + if (current_task != idle_task) { + if (current_task->flags & TM_RUNNABLE) { + enqueue_runnable(current_task); + } + } + schedule(); + } + return; +} + +void usermain(void); + +void init_tasking() { + struct process *task; + + initqueue(&runqueue); + initqueue(&sleepqueue); + initqueue(&terminating); + + mainTask.reg.cr3 = getcr3(); + mainTask.reg.rflags = getrflags(); + mainTask.reg.cs = 0x10; + mainTask.reg.ss = 0x18; + mainTask.pl = 0x0; + mainTask.pid = 0; + mainTask.flags = 0; + + idle_task = new_task(idle_task_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL|TASK_NOSCHED); + + /* TODO move these into a dummy/test function */ + task = new_task(other_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL); + task = new_task(third_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL); + task = new_task(third_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL); + task = new_task(third_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL); +#ifdef HOG + task = new_task(hog_main, mainTask.reg.rflags, mainTask.reg.cr3, TASK_KERNEL); +#endif + + task = new_task(usermain, mainTask.reg.rflags, create_addrspace(), 0); + current_task = &mainTask; + pih.handler = preemption_interrupt; + pih.context = 0; + interrupt_add_handler(IRQ0, &pih); + printk("set up tasking\n"); +} + +struct process *new_task(void (*main)(), uint64_t rflags, uint64_t pagedir, uint64_t flags) { + struct process *p; + + p = koalloc(sizeof *p); + if (!p) { + panic("can't allocate memory for new task\n"); + return 0; + } + create_task(p, main, rflags, pagedir, flags); + return p; +} + +void setup_usertask(void); + +static pid_t next_pid = 2; + +pid_t create_task(struct process *task, void (*main)(), uint64_t rflags, uint64_t addrspace, uint64_t flags) { + + task->reg = (struct registers){ 0 }; /* clear out all registers for new task */ + + /* roll over? re-use? */ + /* could possible use bts on max nums. possibly wasteful, and still O(n) */ + if (!available) { + task->pid = next_pid++; + } + + task->reg.rflags = rflags; + task->reg.cr3 = addrspace; + task->kstack = (uint64_t)PHY2VIRTP(palloc()); /* new kernel stack */ + task->reg.rsp = task->kstack + 0x1000; + task->reg.rip = (uint64_t)main; + task->reg.cs = 0x10; /* kernel code segment */ + task->reg.ss = 0x18; /* kernel data segment */ + task->pl = 0; /* not a user task */ + task->quantum = 0; /* will get a quantum when it's scheduled */ + task->flags = TM_RUNNABLE; /* new tasks are runnable */ + + if (! (flags & TASK_KERNEL)) { + task->main = (int (*)(int,char**))main; + task->reg.rdi = (uintptr_t)task->main; + task->reg.rip = (uintptr_t)setup_usertask; + task->flags |= TM_USER; + } + + /* stacks will need thinking */ + /* stack can't just go at fixed 128 TB, threads need their own */ + if (! (flags & TASK_KERNEL)) { + printk("created user task %u %llx, space = %llx, entry = %llx\n", task->pid, task, task->reg.cr3, task->reg.rip); + } + + if (!(flags & TASK_NOSCHED)) { + enqueue_runnable(task); + } + + return task->pid; +} + + +void setup_usertask(void) { + struct process *task; + task = current_task; + + /* need a sysret trampoline for user tasks */ + //task->reg.rip = (uint64_t)usermodetrampoline; + //task->reg.rcx = (uint64_t)main; + // hmm. if this is in kernel space, it's going to fail after the sysret + // we need to copy the code into the user space, and hope it's PIC... + + /* create a 1 meg map for the user code at the 8 GB mark */ + printk("setting up usertask %u, space = %llx\n", task->pid, getcr3()); + vmapz(task->reg.cr3, GB(8), MB(1), MEM_USERSPACE|MEM_RW); + printk("mapped 1 MB for user\n"); + memmove((void *)GB(8), task->main, KB(4)); + printk("copied main func from %llx to %llx\n", task->main, GB(8)); + /* copy the user main function to the GB 8 mark */ + /* hmm, not mapped here..., so have to map it in the trampoline + * so it has the user mappings + */ + /* map it into kernel space? will need a mutex on that */ + /* then just copy the tables? */ + + task->reg.rip = GB(8); + task->pl = 3; + +#if 0 + task->reg.cs = 0x20; + task->reg.ss = 0x28; +#endif + + /* create the user mode stack */ + task->stacks = TB(127); + vmapz(task->reg.cr3, task->stacks - MB(2), MB(2), MEM_USERSPACE); + task->usp = task->stacks; /* user stack pointer */ + + /* What if this fails? */ + /* need a kernel mode stack for syscalls, and possibly interrupts */ +#if 0 + task->kstack = TB(128); + printk("creating 4 KB kernel stack at virtual address %llx\n", task->kstack-KB(4)); + vmapz(addrspace, task->kstack - KB(4), KB(4), MEM_RW|MAP_TRACE); + task->reg.rsp = task->kstack; +#endif + printk("user task %u, kstack rsp = %llx\n", task->pid, task->kstack); + //print_decode(0, task->reg.cr3, task->reg.rsp-8); +#if 0 + test_address((void *)(task->reg.rsp - 8)); + allstop(); +#endif + /* this won't return */ + printk("user tramp\n"); + /* we loaded to the 8 GB mark... */ + usermodetrampoline( (void (*)()) GB(8), task->reg.rflags); +} + +void schedule(void) { + current_task->flags |= TM_SCHEDULE; +} + +/* set the current task for scheduling, and if we're not in an interrupt, + * immediately re-schedule. could just halt for now, which will wait for the + * next interrupt. + */ +void yield(void) { + schedule(); + /* TODO check if in interrupt. if not, then call one? */ + halt(); +} + +int need_schedule(void) { + return current_task ? current_task->flags & TM_SCHEDULE : 0; +} + +/* probably want to use lock free queue with compare and swap + * see http://preshing.com/20120612/an-introduction-to-lock-free-programming/ + */ + +void do_schedule(void) { + struct process *prev, *next; + + prev = current_task; + + next = dequeue_runnable(); + if (!next) { + /* no runnable task */ + next = idle_task; + } + + next->flags &= ~TM_SCHEDULE; /* clear the schedule flag */ + next->quantum = 10; /* smaller slice for same task */ + + /* don't run though the trouble if we're not changing tasks */ + if (next != prev) { + next->quantum = 20; + dumpqueue(&runqueue, "runqueue"); + current_task = next; + switch_task(&prev->reg, &next->reg); + } +} + +void preempt() { + schedule(); +} diff --git a/process.h b/process.h new file mode 100644 index 0000000..49d24a5 --- /dev/null +++ b/process.h @@ -0,0 +1,120 @@ +#ifndef PROCESS_H_ +#define PROCESS_H_ + +#include +#include + +#include + +#include + +/* 8 MB stack */ +#define RLIMIT_STACK_VAL 0x800000 + +#define TASK_KERNEL 0x1 +#define TASK_NOSCHED 0x2 + +/* + * memory map for processes + * 0xFFFFFFFF8000000 and up, kernel, can copy kernel pml4 entries + * program load at 4 MB + * stack at 128 TB = 8000 0000 0000 (and down) + * mmaps at 127 TB = 7F00 0000 0000 (and down) + * heap at 96 TB = 6000 0000 0000 (and up) + * heap at 2 TB? + * heap after program load... + * program at 4 MB = 40 0000 + */ + +/* so, to create a process, + * allocate 2 MB for stack, + * allocate whatever is needed for the code + * load in the code to the code base + * set up the registers + * set up the stack for the iretq or sysret + * iretq or sysret to start the task + */ + +struct registers { + uint64_t kerrno; + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rdi; + uint64_t rsi; + uint64_t rsp; + uint64_t rbp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; + uint64_t rflags; + uint64_t cr3; + uint64_t kernel_stack; + uint64_t cs; + uint64_t ss; + __attribute__((aligned(16))) uint8_t fpuenv[512]; +} __attribute__((packed)); + +/* flags bits */ + +/* user or kernel process */ +#define TM_USER 0x1 +/* needs re-scheduled */ +#define TM_SCHEDULE 0x2 +/* in syscall */ +#define TM_SYSCALL 0x4 +/* in interrupt */ +#define TM_INTERRUPTED 0x8 + +#define TM_RUNNABLE 0x10 + +struct process { + pid_t pid; + pid_t ppid; + + /* todo just use a user or kernel process, maybe a flag */ + int pl; /* privilege level, 0 for kernel, 3 for users */ + uint64_t flags; /* bit flags */ + + struct registers reg; + vaddr_t kstack; /* pointer to the kernel stack */ + vaddr_t stacks; + vaddr_t usp; /* user stack pointer */ + vaddr_t heap; + struct process *next, *prev; /* scheduling */ + + /* sleep should probably be a struct timespec */ + uint64_t sleep; /* don't need to sleep for 64 bits of ticks */ + + int status; /* return status from main/exit */ + unsigned int quantum; /* how many more ticks */ + // struct inode *cwd; + // struct sigaction_t sigaction[NUM_SIGNALS]; + // uint64_t signal_pending; + int (*main)(int ac, char **av); +}; + +uint64_t getrflags(); /* assembly */ +void init_tasking(); + +pid_t create_task(struct process *task, void (*main)(), uint64_t rflags, uint64_t pagedir, uint64_t flags); +struct process *new_task(void (*entry)(), uint64_t rflags, uint64_t pagedir, uint64_t flags); + +void sleep(uint32_t ticks); +void schedule(); +void do_schedule(); +int need_schedule(); +void preempt(); // Switch task frontend +void switch_task(struct registers *old, struct registers *new); +void switch_task_iret(struct registers *old, struct registers *new); + +/* in assembly */ +void usermodetrampoline(void (*main)(), uint64_t rflags); +#endif diff --git a/taskx64.s b/taskx64.s new file mode 100644 index 0000000..5b2f815 --- /dev/null +++ b/taskx64.s @@ -0,0 +1,274 @@ +extern allstop +extern printk +extern print_decode_p +extern test_address + +global getrflags:function +getrflags: + pushf + mov rax, [rsp] + popf + ret + +global switch_task:function + +; this needs to be in sync with the definition in process.h +struc reg + .kerrno resq 1 ; + .rax resq 1 ; + .rbx resq 1 ; + .rcx resq 1 ; + .rdx resq 1 ; + .rdi resq 1 ; + .rsi resq 1 ; + .rsp resq 1 ; + .rbp resq 1 ; + .r8 resq 1 ; + .r9 resq 1 ; + .r10 resq 1 ; + .r11 resq 1 ; + .r12 resq 1 ; + .r13 resq 1 ; + .r14 resq 1 ; + .r15 resq 1 ; + .rip resq 1 ; + .rflags resq 1 ; + .cr3 resq 1 ; + .kernel_stack resq 1 ; + .cs resq 1 ; + .ss resq 1 ; +endstruc + +%macro multipush 1-* + + %rep %0 + push %1 + %rotate 1 + %endrep + +%endmacro + +; first arg rdi is old register pointer, second arg is new register pointer +switch_task: + ; save old registers + mov [rdi + reg.rax], rax + mov [rdi + reg.rbx], rbx + mov [rdi + reg.rcx], rcx + mov [rdi + reg.rdx], rdx + mov [rdi + reg.rdi], rdi ; a pointer to the current task + mov [rdi + reg.rsi], rsi ; though rsi is just a pointer to the new task... + mov [rdi + reg.rbp], rbp + mov [rdi + reg.r8], r8 + mov [rdi + reg.r9], r9 + mov [rdi + reg.r10], r10 + mov [rdi + reg.r11], r11 + mov [rdi + reg.r12], r12 + mov [rdi + reg.r13], r13 + mov [rdi + reg.r14], r14 + mov [rdi + reg.r15], r15 + ; save rip for return from stack + ; mov? we're going to switch the stack anyway + pop rax + mov [rdi + reg.rip], rax + +; push rdi +; push rsi +; mov rdi, qword printrrip +; mov rsi, [rsp] +; call printk +; pop rsi +; pop rdi + + mov [rdi + reg.rsp], rsp + + ; save rflags + pushf + pop rax + mov [rdi + reg.rflags], rax + ; and the address space + ; TODO skip, shouldn't change + mov rax, cr3 + mov [rdi + reg.cr3], rax + +.loadregisters + ; Ok. now load up the new registers + ; we're going to use rax and rbx, and rsi needs + ; to be last + mov rcx, [rsi + reg.rcx] + mov rdx, [rsi + reg.rdx] + mov rdi, [rsi + reg.rdi] + mov rbp, [rsi + reg.rbp] + mov r8, [rsi + reg.r8] + mov r9, [rsi + reg.r9] + mov r10, [rsi + reg.r10] + mov r11, [rsi + reg.r11] + mov r12, [rsi + reg.r12] + mov r13, [rsi + reg.r13] + mov r14, [rsi + reg.r14] + mov r15, [rsi + reg.r15] + +; push rdi +; push rsi +; mov rdi, qword printsp +; mov rsi, [rsi + reg.rsp] +; call printk +; pop rsi +; pop rdi + + + ; and the address space + ; rsi should be pointing into the kernel + ; memory, so we can still load from that + ; address + mov rax, cr3 + mov rbx, [rsi + reg.cr3] + mov dr1, rax + mov dr2, rbx + cmp rbx, rax + je .skipspaceload + + mov cr3, rbx + mov rbx, cr3 + mov dr3, rbx + ; switch to new stack, has to be after the address space switch, otherwise the + ; stack isn't mapped + +.skipspaceload: + + mov rsp, [rsi + reg.rsp] + + ; load rflags + mov rax, [rsi + reg.rflags] + ;call printrax + push rax + popf + + ; push rip for return from stack + ; must be done after we switch address space + mov rax, [rsi + reg.rip] + push rax + +; push rdi +; push rsi +; mov rdi, qword printrip +; ;mov rsi, [rsi + reg.rip] +; mov rsi, [rsp] +; call printk +; pop rsi +; pop rdi + + ; restore registers we used for the switch + mov rbx, [rsi + reg.rbx] + mov rax, [rsi + reg.rax] + mov rsi, [rsi + reg.rsi] ; have to do this one last + + ret + +global usermodetrampoline:function + +usermodetrampoline: + ; sysret wants return rip in rcx + ; rflags in r11 + + ; user mode function in rdi (i.e. first argument) + push rdi + + mov rax, rdi + call printrax + mov ecx, 0xc0000082 + rdmsr + shl rdx, 32 + or rdx, rax + mov rax, rdx + call printrax + +; TODO clear out all the other registers to not leak anything +; to userspace + + pop rcx + mov r11, 0x202 ; make sure interrupts will be enabled + mov dr0, rdi +thesysret: + db 0x48 ; need rex prefix for 64 bit mode returns + sysret + +section .data +printcr3: db 'cr3 = %llx', 0xA, 0 +printsp: db 'setting rsp %llx', 0xA, 0 +printrip: db 'returning to %llx', 0xA, 0 +printrrip: db 'saving return to %llx', 0xA, 0 + +printcr3s: db 'old cr3 = %llx, new cr3 = %llx', 0xA, 0 + +section .text +dumpcr3: + push rdi + push rsi + push rdx + push rbx + push rax + mov rdi, qword printcr3s + mov rsi, rax + mov rdx, rbx + call printk + pop rax + pop rbx + pop rdx + pop rsi + pop rdi + ret + +printraxs: + push rdi + push rsi + push rax + mov rdi, qword raxprintf + mov rsi, rax + call printk + call allstop + pop rax + pop rsi + pop rdi + ret +printrax: + push rdi + push rsi + push rax + mov rdi, qword raxprintf + mov rsi, rax + call printk + pop rax + pop rsi + pop rdi + ret + +global printistack:function +printistack: + push rdi + push rsi + mov rdi, qword ssprintf + mov rsi, [rbx + 32] + call printk + mov rdi, qword rspprintf + mov rsi, [rbx + 24] + call printk + mov rdi, qword rfprintf + mov rsi, [rbx + 16] + call printk + mov rdi, qword csprintf + mov rsi, [rbx + 8] + call printk + mov rdi, qword ripprintf + mov rsi, [rbx + 0] + call printk + pop rsi + pop rdi + ret + +rfprintf: db 'rflags : %llx', 10, 0 +csprintf: db 'cs : %hx', 10, 0 +ripprintf: db 'rip : %llx', 10, 0 +raxprintf: db 'rax : %llx', 10, 0 +rspprintf: db 'rsp : %llx', 10, 0 +ssprintf: db 'ss : %hx', 10, 0 +csvprintf: db 'gdt : %016llx', 10, 0 diff --git a/usermain.s b/usermain.s new file mode 100644 index 0000000..951baa3 --- /dev/null +++ b/usermain.s @@ -0,0 +1,14 @@ +global usermain:function + +usermain: + xor r12, r12 +.redo + inc r12 + mov rsi, r12 + mov eax, 0xca11d00d + mov rdi, 100 + syscall + cmp r13, 4 + jl .redo + int 0x99 + jmp .redo