processes and multitasking

[zos] / process.c
diff --git a/process.c b/process.c

new file mode 100644 (file)

index 0000000..046247e
--- /dev/null
+++ b/process.c
@@ -0,0 +1,472 @@
+#include <stdint.h>
+#include <stddef.h>
+
+#include "kernel.h"
+#include "mem.h"
+#include "process.h"
+
+#include "timer.h"
+#include "interrupt.h"
+#include <spinlock.h>
+
+#include <syscall.h>
+#include <cpu.h>
+#include <tty.h>
+
+/* 8 MB stack */
+#define RLIMIT_STACK_VAL 0x800000
+
+/*
+ * memory map for processes
+ * 0xFFFF FFFF F800 0000 and up, kernel, can copy kernel pml4 entries
+ * program load at 4 MB
+ * kernel stack at 128 TB 7F...
+ * stack at 127 TB = 8000 0000 0000 (and down)/ thread stacks go somewhere
+ * stacks could be limited to physmem.  perhaps it's ok for each address space
+ * to have it's own total stacks limit
+ * mmaps at 126 TB = 7E00 0000 0000 (and down)
+ * heap  at 96  TB = 6000 0000 0000 (and up)
+ * heap at 2 TB?
+ * heap after program load...
+ * program at 4 MB =        40 0000
+ */
+
+/* so, to create a process,
+ * allocate 2 MB for stack,
+ * allocate whatever is needed for the code
+ * load in the code to the code base
+ * set up the registers
+ * set up the stack for the iretq or sysret
+ * iretq or sysret to start the task
+ */
+
+/* current process is per processor? */
+struct process *current_task = 0;
+static struct process *available = 0; /* a stack of unused task structs */
+static struct process mainTask;
+static struct process *idle_task;
+static void idle_task_main() {
+       while (1) {
+               halt();
+               schedule();
+       }
+}
+
+struct pqueue {
+       struct process *head;
+       struct process *tail;
+       struct spinlock_t lock;
+       unsigned int size;
+};
+ 
+static struct pqueue sleepqueue;
+static struct pqueue runqueue;
+static struct pqueue terminating;
+
+void initqueue(struct pqueue *q) {
+       q->lock = (struct spinlock_t){0};
+       q->head = 0;
+       q->tail = 0;
+       q->size = 0;
+}
+
+void dumpqueue(struct pqueue *q, char *name) {
+       struct process *task;
+
+       if (name) {
+               printk("%u %s:", timer_ticks, name);
+       } else {
+               printk("%u queue %llx:", timer_ticks, q);
+       }
+
+       for (task = q->head; task; task = task->next) {
+               printk(" %u", task->pid);
+               if (task->sleep) {
+                       printk(":%u", task->sleep);
+               }
+       }
+       printk("\n");
+}
+
+/* dequeue sleeper is no different than a regular dequeue */
+void enqueue_sleeper(struct pqueue *q, struct process *task) {
+       struct process *sleeper;
+
+       spinlock_acquire(&q->lock);
+
+       task->next = 0;
+       task->prev = 0;
+
+       if (!q->head) {
+               q->head = task;
+               task->prev = task;
+       } else {
+               for (sleeper = q->head; sleeper; sleeper = sleeper->next) {
+                       if (task->sleep < sleeper->sleep) {
+                               /* insert before this one */
+                               task->next = sleeper;
+                               task->prev = sleeper->prev;
+                               if (sleeper != q->head) {
+                                       sleeper->prev->next = task;
+                               } else {
+                                       q->head = task;
+                               }
+                               sleeper->prev = task;
+                               break;
+                       }
+               }
+
+               if (!sleeper) {
+                       /* if we got here, we're the last task */
+                       task->next = 0;
+                       task->prev = q->head->prev;
+                       task->prev->next = task;
+                       q->head->prev = task;
+               }
+       }
+
+       q->size++;
+
+       spinlock_release(&q->lock);
+}
+
+void enqueue(struct pqueue *q, struct process *task) {
+       spinlock_acquire(&q->lock);
+
+       task->next = 0;
+       if (q->head) {
+               task->prev = q->head->prev;
+               task->prev->next = task;
+               q->head->prev = task;
+       } else {
+               q->head = task;
+               task->prev = task;
+       }
+       q->size++;
+
+       spinlock_release(&q->lock);
+}
+
+struct process *dequeue(struct pqueue *q) {
+       struct process *task;
+
+       spinlock_acquire(&q->lock);
+       task = q->head;
+       if (task) {
+               q->head = task->next;
+               if (q->head) {
+                       q->head->prev = task->prev;
+               }
+               task->next = 0;
+               task->prev = 0;
+       }
+       q->size--;
+       spinlock_release(&q->lock);
+       return task;
+}
+
+void enqueue_runnable(struct process *task) {
+       enqueue(&runqueue, task);
+}
+
+struct process *dequeue_runnable(void) {
+       return dequeue(&runqueue);
+}
+
+pid_t getpid() {
+       return current_task->pid;
+}
+
+void exit(int status) {
+       current_task->status = status;
+
+       /* put in parents process "exited" queue for wait() */
+       schedule();
+}
+
+void terminate(struct process *p) {
+
+       /* remove it from whatever queue it's in */
+       /* enqueue it into terminating queue */
+
+       /* push it on the top of the runqueue */
+       enqueue(&terminating, p);
+       /* free all memory, except maybe kernel stack */
+
+       /* remove from task queues */
+
+       /* add to available */
+       schedule();
+}
+
+void yield(void);
+
+void sleep(uint32_t ticks) {
+       if (current_task->sleep) {
+               panic("tried to sleep pid %u which was already sleeping until %u\n", current_task->pid, current_task->sleep);
+       }
+       current_task->sleep = timer_ticks + ticks;
+       enqueue_sleeper(&sleepqueue, current_task);
+       current_task->flags &= ~TM_RUNNABLE;
+       yield();
+}
+
+static void third_main() {
+       sleep(500);
+       while (1) {
+               printk("%llu: Hello from A pid %lu, cpl = %x\n", timer_ticks, getpid(), current_task->pl);
+               sleep(300+300 * getpid());
+       }
+}
+
+static void other_main() {
+       while (1) {
+               printk("%llu: Hello from B pid %lu\n", timer_ticks, getpid());
+               sleep(10000);
+       }
+}
+
+#define HOG
+#ifdef HOG
+static void hog_main() {
+       static int x = 0;
+       static volatile uint64_t donetil = 0;
+       printk("first scheduled hog\n");
+       while (1) {
+               x++;
+               if (timer_ticks >= donetil) {
+                       printk("%llu hog %u running\n", timer_ticks,current_task->pid);
+                       donetil += 1000;
+               }
+       }
+}
+#endif
+ 
+struct interrupt_handler pih;
+
+static void preemption_interrupt(struct interrupt_context *c, void *n) {
+       struct process *sleeper;
+
+       /* check for sleepers */
+       while (sleepqueue.head && sleepqueue.head->sleep <= timer_ticks) {
+               sleeper = dequeue(&sleepqueue);
+               sleeper->sleep = 0;
+               sleeper->flags |= TM_RUNNABLE;
+               enqueue_runnable(sleeper);
+       }
+
+       if (current_task != idle_task) {
+               current_task->quantum--;
+       } else {
+               schedule();
+       }
+
+       if (current_task->quantum == 0) {
+               if (current_task != idle_task) {
+                       if (current_task->flags & TM_RUNNABLE) {
+                               enqueue_runnable(current_task);
+                       }
+               }
+               schedule();
+       }
+       return;
+}
+
+void usermain(void);
+
+void init_tasking() {
+       struct process *task;
+
+       initqueue(&runqueue);
+       initqueue(&sleepqueue);
+       initqueue(&terminating);
+
+       mainTask.reg.cr3 = getcr3();
+       mainTask.reg.rflags = getrflags();
+       mainTask.reg.cs = 0x10;
+       mainTask.reg.ss = 0x18;
+       mainTask.pl = 0x0;
+       mainTask.pid = 0;
+       mainTask.flags = 0;
+
+       idle_task = new_task(idle_task_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL|TASK_NOSCHED);
+
+       /* TODO move these into a dummy/test function */
+       task = new_task(other_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL);
+       task = new_task(third_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL);
+       task = new_task(third_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL);
+       task = new_task(third_main, mainTask.reg.rflags, MEM_KERNEL, TASK_KERNEL);
+#ifdef HOG
+       task = new_task(hog_main,   mainTask.reg.rflags, mainTask.reg.cr3, TASK_KERNEL);
+#endif
+
+       task = new_task(usermain, mainTask.reg.rflags, create_addrspace(), 0);
+       current_task = &mainTask;
+       pih.handler = preemption_interrupt;
+       pih.context = 0;
+       interrupt_add_handler(IRQ0, &pih);
+       printk("set up tasking\n");
+}
+ 
+struct process *new_task(void (*main)(), uint64_t rflags, uint64_t pagedir, uint64_t flags) {
+       struct process *p;
+
+       p = koalloc(sizeof *p);
+       if (!p) {
+               panic("can't allocate memory for new task\n");
+               return 0;
+       }
+       create_task(p, main, rflags, pagedir, flags);
+       return p;
+}
+
+void setup_usertask(void);
+
+static pid_t next_pid = 2;
+
+pid_t create_task(struct process *task, void (*main)(), uint64_t rflags, uint64_t addrspace, uint64_t flags) {
+
+       task->reg = (struct registers){ 0 }; /* clear out all registers for new task */
+
+       /* roll over?  re-use? */
+       /* could possible use bts on max nums.  possibly wasteful, and still O(n) */
+       if (!available) {
+               task->pid = next_pid++;
+       }
+
+       task->reg.rflags = rflags;
+       task->reg.cr3    = addrspace;
+       task->kstack = (uint64_t)PHY2VIRTP(palloc()); /* new kernel stack */
+       task->reg.rsp = task->kstack + 0x1000;
+       task->reg.rip = (uint64_t)main;
+       task->reg.cs = 0x10; /* kernel code segment */
+       task->reg.ss = 0x18; /* kernel data segment */
+       task->pl = 0; /* not a user task */
+       task->quantum = 0; /* will get a quantum when it's scheduled */
+       task->flags = TM_RUNNABLE; /* new tasks are runnable */
+
+       if (! (flags & TASK_KERNEL)) {
+               task->main = (int (*)(int,char**))main;
+               task->reg.rdi = (uintptr_t)task->main;
+               task->reg.rip = (uintptr_t)setup_usertask;
+               task->flags |= TM_USER;
+       }
+       
+       /* stacks will need thinking */
+       /* stack can't just go at fixed 128 TB, threads need their own */
+       if (! (flags & TASK_KERNEL)) {
+       printk("created user task %u %llx, space = %llx, entry = %llx\n", task->pid, task, task->reg.cr3, task->reg.rip);
+       }
+
+       if (!(flags & TASK_NOSCHED)) {
+               enqueue_runnable(task);
+       }
+
+       return task->pid;
+}
+
+
+void setup_usertask(void) {
+       struct process *task;
+       task = current_task;
+
+       /* need a sysret trampoline for user tasks */
+       //task->reg.rip = (uint64_t)usermodetrampoline;
+       //task->reg.rcx = (uint64_t)main;
+       // hmm.  if this is in kernel space, it's going to fail after the sysret
+       // we need to copy the code into the user space, and hope it's PIC...
+
+       /* create a 1 meg map for the user code at the 8 GB mark */
+       printk("setting up usertask %u, space = %llx\n", task->pid, getcr3());
+       vmapz(task->reg.cr3, GB(8), MB(1), MEM_USERSPACE|MEM_RW);
+       printk("mapped 1 MB for user\n");
+       memmove((void *)GB(8), task->main, KB(4));
+       printk("copied main func from %llx to %llx\n", task->main, GB(8));
+       /* copy the user main function to the GB 8 mark */
+       /* hmm, not mapped here..., so have to map it in the trampoline
+        * so it has the user mappings
+        */ 
+       /* map it into kernel space?  will need a mutex on that */
+       /* then just copy the tables? */
+
+       task->reg.rip = GB(8);
+       task->pl = 3;
+
+#if 0
+       task->reg.cs = 0x20;
+       task->reg.ss = 0x28;
+#endif
+
+       /* create the user mode stack */
+       task->stacks = TB(127);
+       vmapz(task->reg.cr3, task->stacks - MB(2), MB(2), MEM_USERSPACE);
+       task->usp = task->stacks; /* user stack pointer */
+       
+       /* What if this fails? */
+       /* need a kernel mode stack for syscalls, and possibly interrupts */
+#if 0
+       task->kstack = TB(128);
+       printk("creating 4 KB kernel stack at virtual address %llx\n", task->kstack-KB(4));
+       vmapz(addrspace, task->kstack - KB(4), KB(4), MEM_RW|MAP_TRACE);
+       task->reg.rsp = task->kstack;
+#endif
+       printk("user task %u, kstack rsp = %llx\n", task->pid, task->kstack);
+       //print_decode(0, task->reg.cr3, task->reg.rsp-8);
+#if 0
+       test_address((void *)(task->reg.rsp - 8));
+       allstop();
+#endif
+       /* this won't return */
+       printk("user tramp\n");
+       /* we loaded to the 8 GB mark... */
+       usermodetrampoline( (void (*)()) GB(8), task->reg.rflags);
+}
+ 
+void schedule(void) {
+       current_task->flags |= TM_SCHEDULE;
+}
+
+/* set the current task for scheduling, and if we're not in an interrupt,
+ * immediately re-schedule.  could just halt for now, which will wait for the
+ * next interrupt.
+ */
+void yield(void) {
+       schedule();
+       /* TODO check if in interrupt.  if not, then call one? */
+       halt();
+}
+
+int need_schedule(void) {
+       return current_task ? current_task->flags & TM_SCHEDULE : 0;
+}
+
+/* probably want to use lock free queue with compare and swap
+ * see http://preshing.com/20120612/an-introduction-to-lock-free-programming/
+ */
+
+void do_schedule(void) {
+       struct process *prev, *next;
+
+       prev = current_task;
+
+       next = dequeue_runnable();
+       if (!next) {
+               /* no runnable task */
+               next = idle_task;
+       }
+
+       next->flags &= ~TM_SCHEDULE; /* clear the schedule flag */
+       next->quantum = 10; /* smaller slice for same task */
+
+       /* don't run though the trouble if we're not changing tasks */
+       if (next != prev) {
+               next->quantum = 20;
+               dumpqueue(&runqueue, "runqueue");
+               current_task = next;
+               switch_task(&prev->reg, &next->reg);
+       }
+}
+
+void preempt() {
+       schedule();
+}