2 次代码提交 64a2f98d10 ... ba065d626d

作者 SHA1 备注 提交日期
  coderain ba065d626d Bugfixes. Starting work on sections. 5 年之前
  coderain 7001b53236 Start implementing address space management 5 年之前
共有 5 个文件被更改,包括 620 次插入45 次删除
  1. 47 4
      kernel/include/new_memory.h
  2. 11 1
      kernel/src/memory/main.c
  3. 202 18
      kernel/src/memory/map.c
  4. 16 22
      kernel/src/memory/physical.c
  5. 344 0
      kernel/src/memory/virtual.c

+ 47 - 4
kernel/include/new_memory.h

@@ -25,6 +25,8 @@
 #include <sdk/memory.h>
 #include <sdk/avltree.h>
 #include <exception.h>
+#include <object.h>
+#include <filesystem.h>
 
 #define INVALID_PAGE              ((physical_t)-1)
 #define PAGE_SIZE                 0x1000
@@ -32,12 +34,18 @@
 #define MIN_PHYS_ADDR_BITS        16
 #define MAX_PHYS_ADDR_BITS        52
 #define MEMORY_MAX_BLOCKS         0x80000
+#define MEMORY_METADATA_TOP       0xFF000000
 
+/* Set by the client */
 #define MEMORY_FLAG_ACCESSIBLE    (1 << 0)
 #define MEMORY_FLAG_WRITABLE      (1 << 1)
 #define MEMORY_FLAG_EXECUTABLE    (1 << 2)
 #define MEMORY_FLAG_USERMODE      (1 << 3)
-#define MEMORY_FLAG_EVICTABLE     (1 << 29)
+#define MEMORY_FLAG_STICKY        (1 << 4)
+#define MEMORY_FLAG_EVICTABLE     (1 << 5)
+
+/* Set by the system */
+#define MEMORY_FLAG_EVICTED       (1 << 29)
 #define MEMORY_FLAG_COPY_ON_WRITE (1 << 30)
 #define MEMORY_FLAG_FREE          (1 << 31)
 
@@ -83,6 +91,26 @@ typedef struct
     page_status_t status;
 } memory_map_entry_t;
 
+typedef enum
+{
+    MEMORY_ZERO_BACKED_SECTION,
+    MEMORY_FILE_BACKED_SECTION,
+    MEMORY_PHYSICAL_BACKED_SECTION,
+} memory_section_backing_t;
+
+typedef struct
+{
+    object_t header;
+    memory_section_backing_t backing;
+    page_num_t num_pages;
+
+    union
+    {
+        page_t *pages;
+        file_instance_t *file; /* strong reference */
+    };
+} memory_section_t;
+
 typedef struct
 {
     avl_node_t by_addr_node;
@@ -90,6 +118,8 @@ typedef struct
     uintptr_t address;
     size_t size;
     memory_flags_t flags;
+    memory_section_t *section;
+    page_num_t section_offset;
 } memory_block_t;
 
 typedef struct
@@ -103,14 +133,17 @@ typedef struct
 } address_space_t;
 
 extern uintptr_t memory_metadata_base;
-extern page_table_t memory_default_table;
+extern const page_table_t memory_default_table;
+extern const page_table_t memory_shadow_table;
+extern address_space_t *memory_lower_space;
+extern address_space_t *const memory_upper_space;
 
 void *memory_request_metadata_space(size_t count, size_t size);
 
 page_t *memory_acquire_page(byte_t min_bits, byte_t max_bits, size_t alignment);
 void memory_acquire_area(byte_t min_bits, byte_t max_bits, size_t size, size_t alignment, area_t *area);
 void memory_release_page(page_t *page);
-void memory_release_area(area_t *area);
+void memory_release_area(const area_t *area);
 page_t *memory_find_page_by_address(physical_t address);
 void memory_claim_physical_region(physical_t address, qword_t size, page_status_t initial_status);
 void memory_abandon_physical_region(physical_t address, qword_t size);
@@ -118,10 +151,20 @@ void memory_init_physical(memory_map_entry_t *mmap, size_t entry_count);
 
 page_t *memory_get_page_mapping(page_table_t table, void *address);
 sysret_t memory_map_page(page_table_t table, page_t *page, void *address, memory_flags_t access_flags);
-sysret_t memory_query_access_flags(page_table_t table, void *address, memory_flags_t *access_flags);
+sysret_t memory_map_area(page_table_t table, const area_t *area, void *address, memory_flags_t access_flags);
+sysret_t memory_query_page_flags(page_table_t table, void *address, memory_flags_t *access_flags);
 sysret_t memory_adjust_page_flags(page_table_t table, void *address, memory_flags_t access_flags);
 sysret_t memory_unmap_clear_page(page_table_t table, void *address);
 sysret_t memory_unmap_keep_page(page_table_t table, void *address);
+sysret_t memory_load_default_table(page_t *new_default_table);
+sysret_t memory_load_shadow_table(page_t *new_shadow_table);
+sysret_t memory_unload_shadow_table(void);
+page_t *memory_create_page_table(void);
 void memory_init_mapping(void);
 
+memory_block_t *memory_get_block_for_address(void *address);
+sysret_t memory_allocate(address_space_t *space, void **address, size_t size, memory_flags_t flags, memory_section_t *section, page_num_t section_offset);
+sysret_t memory_free(address_space_t *space, void *address);
+void memory_init_virtual(const area_t *kernel_area);
+
 #endif

+ 11 - 1
kernel/src/memory/main.c

@@ -27,7 +27,7 @@ extern void memory_init_mapping_hack(void);
 extern void memory_init_physical(memory_map_entry_t *mmap, size_t num_entries);
 extern void (*_end)();
 
-uintptr_t memory_metadata_base = 0xFF000000;
+uintptr_t memory_metadata_base = MEMORY_METADATA_TOP;
 
 static int compare_map_entry(const void *p1, const void *p2)
 {
@@ -166,7 +166,9 @@ void *memory_request_metadata_space(size_t count, size_t size)
 
 void new_memory_init(uintptr_t mboot_tags, size_t mboot_size)
 {
+    cpu_max_physical_bits = 32; // No PAE for now
     memory_init_mapping_hack();
+
     size_t num_entries, max_entries = 1024;
 
     for (;;)
@@ -181,4 +183,12 @@ void new_memory_init(uintptr_t mboot_tags, size_t mboot_size)
 
         max_entries += max_entries;
     }
+
+    area_t kernel_area = {
+        .pages = memory_find_page_by_address(MULTIBOOT_LOAD_ADDRESS),
+        .count = PAGE_NUMBER(PAGE_ALIGN_UP((uintptr_t)&_end - VIRTUAL_LOAD_ADDRESS)),
+    };
+
+    ASSERT(kernel_area.pages != NULL);
+    memory_init_virtual(&kernel_area);
 }

+ 202 - 18
kernel/src/memory/map.c

@@ -45,13 +45,14 @@ typedef union
         bool_t no_cache : 1;
         bool_t accessed : 1;
         bool_t dirty : 1;
-        bool_t reserved1 : 1;
+        bool_t large : 1;
         bool_t global : 1;
         bool_t cow : 1;
-        size_t reserved2 : 2;
+        bool_t sticky : 1;
+        size_t reserved1 : 1;
         page_num_t number : 40;
-        size_t reserved3 : 11;
-        bool_t no_execute : 1;
+        size_t reserved2 : 11;  /* PAE only */
+        bool_t no_execute : 1;  /* PAE only */
     } present;
 
     struct
@@ -62,9 +63,11 @@ typedef union
         bool_t writable : 1;
         bool_t executable : 1;
         bool_t usermode : 1;
-        size_t reserved1 : 4;
+        bool_t sticky : 1;
+        bool_t cow : 1;
+        size_t reserved1 : 2;
         page_num_t number : 40;
-        size_t reserved2 : 12;
+        size_t reserved2 : 12;  /* PAE only */
     } absent;
 } pte_t;
 
@@ -77,8 +80,10 @@ static struct
 // LMA:  { {39, 9}, {30, 9}, {21, 9}, {12, 9} }
 // VA57: { {48, 9}. {39, 9}, {30, 9}, {21, 9}, {12, 9} }
 
-page_table_t memory_default_table = (page_table_t)0xFFFFFFFC;
-byte_t paging_levels = 2;
+const page_table_t memory_default_table = (page_table_t)0xFFFFFFFC;
+const page_table_t memory_shadow_table = (page_table_t)0xFFFFFFF8;
+size_t memory_table_size = 0x400000;
+byte_t paging_levels = 2, self_entry_level = 0;
 byte_t table_entry_size = 4;
 
 static inline pte_t read_pte(pte_pointer_t ppte)
@@ -113,8 +118,7 @@ static pte_pointer_t memory_get_table_entry(page_table_t table, void *address, b
     uintptr_t numeric_address = (uintptr_t)address;
     uintptr_t table_base = (uintptr_t)table;
 
-    int level;
-    for (level = 0; level < paging_levels; level++)
+    for (int level = 0; level < paging_levels; level++)
     {
         uintptr_t level_mask = ((uintptr_t)1 << page_table_levels[level].bits) - 1;
         uintptr_t table_mask = (level_mask + 1) * table_entry_size - 1;
@@ -157,13 +161,91 @@ static sysret_t get_or_create_table_entry(page_table_t table, void *address, pte
     return ERR_SUCCESS;
 }
 
+static void update_sticky_pages(int level, pte_pointer_t source, pte_pointer_t destination)
+{
+    pte_t pte;
+    size_t count = 1 << (page_table_levels[level].bits - (level ? 0 : 1));
+
+    /* We must skip entries that point back into higher-level tables */
+    if (level == self_entry_level) count -= 2;
+
+#define UPDATE_PAGES_LOOP(type)                                         \
+    do                                                                  \
+    {                                                                   \
+        type *src_ppte = (type*)source;                                 \
+        type *dest_ppte = (type*)destination;                           \
+                                                                        \
+        for (size_t i = 0; i < count; i++)                              \
+        {                                                               \
+            pte.raw_entry = *src_ppte;                                  \
+                                                                        \
+            if (pte.is_present)                                         \
+            {                                                           \
+                if (pte.present.sticky)                                 \
+                {                                                       \
+                    if (!memory_get_page_mapping(memory_shadow_table, dest_ppte)) \
+                    {                                                   \
+                        page_t *table_page = memory_acquire_page(MIN_PHYS_ADDR_BITS, MAX_PHYS_ADDR_BITS, PAGE_SIZE); \
+                        if (!table_page) KERNEL_CRASH("No free pages were available at a critical moment"); \
+                                                                        \
+                        sysret_t ret = memory_map_page(memory_shadow_table, \
+                                                       table_page,      \
+                                                       (void*)PAGE_ALIGN((uintptr_t)dest_ppte), \
+                                                       MEMORY_FLAG_ACCESSIBLE | MEMORY_FLAG_WRITABLE); \
+                        if (ret != ERR_SUCCESS) KERNEL_CRASH("Unexpected mapping error"); \
+                    }                                                   \
+                                                                        \
+                    *dest_ppte = pte.raw_entry;                         \
+                }                                                       \
+                else if (level + 1 < paging_levels)                     \
+                {                                                       \
+                    uintptr_t mask = memory_table_size - 1;             \
+                    int shift = page_table_levels[level + 1].bits;      \
+                    pte_pointer_t nested_src = (pte_pointer_t)(((uintptr_t)src_ppte & ~mask) | (((uintptr_t)src_ppte << shift) & mask)); \
+                    pte_pointer_t nested_dest = (pte_pointer_t)(((uintptr_t)dest_ppte & ~mask) | (((uintptr_t)dest_ppte << shift) & mask)); \
+                    update_sticky_pages(level + 1, nested_src, nested_dest); \
+                }                                                       \
+            }                                                           \
+                                                                        \
+            src_ppte++;                                                 \
+            dest_ppte++;                                                \
+        }                                                               \
+    } while(FALSE)
+
+    if (table_entry_size == 4) UPDATE_PAGES_LOOP(dword_t);
+    else if (table_entry_size == 8) UPDATE_PAGES_LOOP(qword_t);
+
+#undef UPDATE_PAGES_LOOP
+}
+
 page_t *memory_get_page_mapping(page_table_t table, void *address)
 {
     pte_pointer_t ppte = memory_get_table_entry(table, address, FALSE);
     if (!ppte) return NULL;
 
     pte_t pte = read_pte(ppte);
-    return pte.is_present ? memory_find_page_by_address(pte.present.number * PAGE_SIZE) : NULL;
+
+    if (pte.is_present)
+    {
+        return memory_find_page_by_address(pte.present.number * PAGE_SIZE);
+    }
+    else
+    {
+        switch (pte.absent.type)
+        {
+        case PTE_COMMITTED:
+            return memory_find_page_by_address(pte.absent.number * PAGE_SIZE);
+
+        case PTE_BLANK:
+        case PTE_RESERVED:
+        case PTE_EVICTED:
+        case PTE_TRANSITIONAL:
+            return NULL;
+
+        default:
+            KERNEL_CRASH("Invalid page type");
+        }
+    }
 }
 
 sysret_t memory_map_page(page_table_t table, page_t *page, void *address, memory_flags_t access_flags)
@@ -183,6 +265,7 @@ sysret_t memory_map_page(page_table_t table, page_t *page, void *address, memory
         new_pte.is_present = TRUE;
         new_pte.present.writable = (access_flags & MEMORY_FLAG_WRITABLE) ? TRUE : FALSE;
         new_pte.present.usermode = (access_flags & MEMORY_FLAG_USERMODE) ? TRUE : FALSE;
+        new_pte.present.sticky = (access_flags & MEMORY_FLAG_STICKY) ? TRUE : FALSE;
         new_pte.present.number = page->number;
         new_pte.present.no_execute = (access_flags & MEMORY_FLAG_EXECUTABLE) ? FALSE : TRUE;
 
@@ -196,6 +279,7 @@ sysret_t memory_map_page(page_table_t table, page_t *page, void *address, memory
         new_pte.absent.writable = (access_flags & MEMORY_FLAG_WRITABLE) ? TRUE : FALSE;
         new_pte.absent.executable = (access_flags & MEMORY_FLAG_EXECUTABLE) ? TRUE : FALSE;
         new_pte.absent.usermode = (access_flags & MEMORY_FLAG_USERMODE) ? TRUE : FALSE;
+        new_pte.absent.sticky = (access_flags & MEMORY_FLAG_STICKY) ? TRUE : FALSE;
         new_pte.absent.number = page->number;
     }
 
@@ -211,7 +295,32 @@ sysret_t memory_map_page(page_table_t table, page_t *page, void *address, memory
     }
 }
 
-sysret_t memory_query_access_flags(page_table_t table, void *address, memory_flags_t *access_flags)
+sysret_t memory_map_area(page_table_t table, const area_t *area, void *address, memory_flags_t access_flags)
+{
+    sysret_t ret = ERR_SUCCESS;
+    uintptr_t numeric_address = PAGE_ALIGN((uintptr_t)address);
+    page_num_t page;
+
+    for (page = 0; page < area->count; page++)
+    {
+        if ((ret = memory_map_page(table,
+                                   &area->pages[page],
+                                   (void*)(numeric_address + (size_t)page * PAGE_SIZE),
+                                   access_flags)) != ERR_SUCCESS) break;
+    }
+
+    if (ret != ERR_SUCCESS)
+    {
+        for (page_num_t i = 0; i < page; i++)
+        {
+            memory_unmap_clear_page(table, (void*)(numeric_address + (size_t)page * PAGE_SIZE));
+        }
+    }
+
+    return ret;
+}
+
+sysret_t memory_query_page_flags(page_table_t table, void *address, memory_flags_t *access_flags)
 {
     pte_pointer_t ppte = memory_get_table_entry(table, address, FALSE);
     if (!ppte) return ERR_BADPTR;
@@ -225,6 +334,8 @@ sysret_t memory_query_access_flags(page_table_t table, void *address, memory_fla
         if (pte.present.writable) *access_flags |= MEMORY_FLAG_WRITABLE;
         if (!(pte.present.no_execute)) *access_flags |= MEMORY_FLAG_EXECUTABLE;
         if (pte.present.usermode) *access_flags |= MEMORY_FLAG_USERMODE;
+        if (pte.present.sticky) *access_flags |= MEMORY_FLAG_STICKY;
+        if (pte.present.cow) *access_flags |= MEMORY_FLAG_COPY_ON_WRITE;
     }
     else
     {
@@ -233,6 +344,8 @@ sysret_t memory_query_access_flags(page_table_t table, void *address, memory_fla
         if (pte.absent.writable) *access_flags |= MEMORY_FLAG_WRITABLE;
         if (pte.absent.executable) *access_flags |= MEMORY_FLAG_EXECUTABLE;
         if (pte.absent.usermode) *access_flags |= MEMORY_FLAG_USERMODE;
+        if (pte.absent.sticky) *access_flags |= MEMORY_FLAG_STICKY;
+        if (pte.absent.type == PTE_EVICTED || pte.absent.type == PTE_TRANSITIONAL) *access_flags |= MEMORY_FLAG_EVICTED;
     }
 
     return ERR_SUCCESS;
@@ -312,6 +425,7 @@ sysret_t memory_unmap_keep_page(page_table_t table, void *address)
     entry.absent.writable = original_entry.present.writable;
     entry.absent.executable = !original_entry.present.no_execute;
     entry.absent.usermode = original_entry.present.usermode;
+    entry.absent.sticky = original_entry.present.sticky;
     entry.absent.reserved1 = 0;
     entry.absent.number = 0;
     entry.absent.reserved2 = 0;
@@ -323,13 +437,84 @@ sysret_t memory_unmap_keep_page(page_table_t table, void *address)
     return ERR_SUCCESS;
 }
 
+sysret_t memory_load_shadow_table(page_t *new_shadow_table)
+{
+    if (new_shadow_table->status < PAGE_STATUS_ALLOCATED) return ERR_INVALID;
+
+    sysret_t ret = memory_map_page(memory_default_table,
+                                   new_shadow_table,
+                                   (void*)(-2 * PAGE_SIZE),
+                                   MEMORY_FLAG_ACCESSIBLE | MEMORY_FLAG_WRITABLE);
+    if (ret != ERR_SUCCESS) return ret;
+
+    pte_pointer_t self_entry = (pte_pointer_t)(-PAGE_SIZE - table_entry_size * 2);
+    pte_t new_pte = { 0 };
+    pte_t old_pte = read_pte(self_entry);
+    new_pte.is_present = TRUE;
+    new_pte.present.writable = TRUE;
+    new_pte.present.number = new_shadow_table->number;
+    new_pte.present.no_execute = TRUE;
+
+    if (!cmpxchg_pte(self_entry, old_pte, new_pte))
+    {
+        memory_unmap_clear_page(memory_default_table, (void*)(-2 * PAGE_SIZE));
+        return ERR_BUSY;
+    }
+
+    return ERR_SUCCESS;
+}
+
+sysret_t memory_unload_shadow_table(void)
+{
+    return memory_unmap_clear_page(memory_default_table, (void*)(-2 * PAGE_SIZE));
+}
+
 sysret_t memory_load_default_table(page_t *new_default_table)
 {
     if (new_default_table->status < PAGE_STATUS_ALLOCATED) return ERR_INVALID;
+
+    sysret_t ret = memory_load_shadow_table(new_default_table);
+    if (ret != ERR_SUCCESS) return ret;
+
+    pte_pointer_t source_table = (pte_pointer_t)((intptr_t)-PAGE_SIZE >> 1);
+    pte_pointer_t dest_table = (pte_pointer_t)(((intptr_t)-PAGE_SIZE >> 1) - (intptr_t)memory_table_size);
+    update_sticky_pages(0, source_table, dest_table);
+    ret = memory_unload_shadow_table();
+    ASSERT(ret == ERR_SUCCESS);
+
     cpu_write_page_table_register(new_default_table->number * PAGE_SIZE);
     return ERR_SUCCESS;
 }
 
+page_t *memory_create_page_table(void)
+{
+    page_t *page = memory_acquire_page(MIN_PHYS_ADDR_BITS, MAX_PHYS_ADDR_BITS, PAGE_SIZE);
+    if (!page) return NULL;
+
+    if (memory_load_shadow_table(page) != ERR_SUCCESS)
+    {
+        memory_release_page(page);
+        return NULL;
+    }
+
+    memset((void*)(-2 * PAGE_SIZE), 0, PAGE_SIZE);
+    pte_t zero = { 0 }, pte = zero;
+    pte.is_present = TRUE;
+    pte.present.writable = TRUE;
+    pte.present.number = page->number;
+    pte.present.no_execute = TRUE;
+
+    if (!cmpxchg_pte((pte_pointer_t)(-PAGE_SIZE - table_entry_size), zero, pte))
+    {
+        memory_unload_shadow_table();
+        memory_release_page(page);
+        return NULL;
+    }
+
+    memory_unload_shadow_table();
+    return page;
+}
+
 void memory_init_mapping_hack(void)
 {
     page_table_t boot_table = (page_table_t)0xC0300C00;
@@ -341,12 +526,11 @@ void memory_init_mapping_hack(void)
         self_referencing_pte = memory_get_table_entry(boot_table, self_referencing_pte, TRUE);
     }
 
-    pte_t zero = { 0 };
-    pte_t pte = zero;
-    pte.is_present = TRUE,
-    pte.present.writable = TRUE,
-    pte.present.number = PAGE_NUMBER(cpu_read_page_table_register()),
-    pte.present.no_execute = TRUE,
+    pte_t zero = { 0 }, pte = zero;
+    pte.is_present = TRUE;
+    pte.present.writable = TRUE;
+    pte.present.number = PAGE_NUMBER(cpu_read_page_table_register());
+    pte.present.no_execute = TRUE;
 
     cmpxchg_pte(self_referencing_pte, zero, pte);
 }

+ 16 - 22
kernel/src/memory/physical.c

@@ -162,7 +162,7 @@ void memory_release_page(page_t *page)
     lock_release(&list->lock);
 }
 
-void memory_release_area(area_t *area)
+void memory_release_area(const area_t *area)
 {
     size_t i;
     for (i = 0; i < area->count; i++) memory_release_page(&area->pages[i]);
@@ -177,8 +177,8 @@ page_t *memory_find_page_by_address(physical_t address)
     page_t *page = &list->all[PAGE_NUMBER(address) - list->first_page_num];
 
     memory_flags_t access_flags;
-    return (memory_query_access_flags(memory_default_table, page, &access_flags)
-            && page->status == PAGE_STATUS_ABANDONED) ? page : NULL;
+    return (memory_query_page_flags(memory_default_table, page, &access_flags) == ERR_SUCCESS
+            && page->status != PAGE_STATUS_ABANDONED) ? page : NULL;
 }
 
 void memory_claim_physical_region(physical_t address, qword_t size, page_status_t initial_status)
@@ -206,7 +206,7 @@ void memory_claim_physical_region(physical_t address, qword_t size, page_status_
             page_t *page = &list->all[number - list->first_page_num];
             memory_flags_t access_flags;
 
-            if (memory_query_access_flags(memory_default_table, page, &access_flags) != ERR_SUCCESS)
+            if (memory_query_page_flags(memory_default_table, page, &access_flags) != ERR_SUCCESS)
             {
                 page_t temporary[MAX_PAGING_LEVELS];
                 page_num_t num_temp = 0;
@@ -262,20 +262,18 @@ void memory_claim_physical_region(physical_t address, qword_t size, page_status_
                     page[i] = temporary[i];
                 }
 
-                number += num_temp + 1;
+                number += num_temp;
             }
-            else
-            {
-                if (page->status == PAGE_STATUS_ABANDONED)
-                {
-                    page->number = number;
-                    page->status = initial_status;
-                    page->map_count = 0;
-                    if (initial_status == PAGE_STATUS_FREE) mini_list_prepend(&list->free_stack, &page->stack_link);
-                }
 
-                number++;
+            if (page->status == PAGE_STATUS_ABANDONED)
+            {
+                page->number = number;
+                page->status = initial_status;
+                page->map_count = 0;
+                if (initial_status == PAGE_STATUS_FREE) mini_list_prepend(&list->free_stack, &page->stack_link);
             }
+
+            number++;
         }
 
         lock_release(&list->lock);
@@ -290,21 +288,17 @@ void memory_abandon_physical_region(physical_t address, qword_t size)
 
 static void memory_init_page_lists(void)
 {
-    size_t pages_covered = 0;
     ASSERT(cpu_max_physical_bits <= MAX_PHYS_ADDR_BITS);
 
-    byte_t bits;
-    for (bits = MIN_PHYS_ADDR_BITS; bits <= cpu_max_physical_bits; bits++)
+    for (byte_t bits = cpu_max_physical_bits; bits >= MIN_PHYS_ADDR_BITS; bits--)
     {
         page_list_t *list = &page_list[bits - MIN_PHYS_ADDR_BITS];
 
-        list->first_page_num = pages_covered;
-        list->num_pages = PAGE_NUMBER(1ULL << bits) - pages_covered;
+        list->first_page_num = bits > MIN_PHYS_ADDR_BITS ? PAGE_NUMBER(1ULL << (bits - 1)) : 0;
+        list->num_pages = PAGE_NUMBER(1ULL << bits) - list->first_page_num;
         list->all = memory_request_metadata_space(list->num_pages, sizeof(page_t));
         mini_list_init(&list->free_stack);
         lock_init(&list->lock);
-
-        pages_covered += list->num_pages;
     }
 }
 

+ 344 - 0
kernel/src/memory/virtual.c

@@ -0,0 +1,344 @@
+/*
+ * memory/virtual.c
+ *
+ * Copyright (C) 2019 Aleksandar Andrejevic <theflash@sdf.lonestar.org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <new_memory.h>
+#include <log.h>
+
+#define SPACE_BLOCK_PAGES PAGE_NUMBER(PAGE_ALIGN_UP(MEMORY_MAX_BLOCKS * sizeof(memory_block_t)))
+#define SPACE_BITMAP_PAGES PAGE_NUMBER(PAGE_ALIGN_UP(MEMORY_MAX_BLOCKS / 8))
+
+static address_space_t kernel_space;
+
+address_space_t *memory_lower_space = NULL;
+address_space_t *const memory_upper_space = &kernel_space;
+memory_block_t *user_memory_blocks = NULL;
+dword_t *user_memory_block_bitmap = NULL;
+
+static memory_block_t *block_create(address_space_t *space)
+{
+    dword_t bit;
+    for (bit = 0; bit < MEMORY_MAX_BLOCKS && test_bit(space->block_bitmap, bit); bit++) continue;
+    if (bit == MEMORY_MAX_BLOCKS) return NULL;
+
+    set_bit(space->block_bitmap, bit);
+    memory_block_t *block = &space->blocks[bit];
+    memset(block, 0, sizeof(*block));
+
+    return block;
+}
+
+static void block_free(address_space_t *space, memory_block_t *block)
+{
+    clear_bit(space->block_bitmap, block - space->blocks);
+}
+
+static memory_block_t *block_find(address_space_t *space, uintptr_t address)
+{
+    lock_acquire_shared(&space->lock);
+
+    avl_node_t *node = avl_tree_lower_bound(&space->by_addr_tree, &address);
+    memory_block_t *block = CONTAINER_OF(node, memory_block_t, by_addr_node);
+    ASSERT(node && address - block->address < block->size);
+
+    lock_release(&space->lock);
+    return block;
+}
+
+static memory_block_t *find_smallest_free_block(avl_node_t *root, size_t min_size)
+{
+    if (!root) return NULL;
+
+    memory_block_t *block = CONTAINER_OF(root, memory_block_t, by_size_node);
+    if (block->size < min_size) return find_smallest_free_block(root->right, min_size);
+
+    memory_block_t *left = find_smallest_free_block(root->left, min_size);
+    memory_block_t *right = find_smallest_free_block(root->right, min_size);
+    if (left && left->size < block->size) block = left;
+    if (right && right->size < block->size) block = right;
+
+    return block;
+}
+
+static memory_block_t *find_preferred_free_block(avl_node_t *root, uintptr_t address, size_t min_size)
+{
+    if (!root) return NULL;
+
+    memory_block_t *block = CONTAINER_OF(root, memory_block_t, by_size_node);
+    if (address >= block->address && (address - block->address) < block->size)
+    {
+        return (block->size - (address - block->address) >= min_size) ? block : NULL;
+    }
+
+    memory_block_t *left = find_preferred_free_block(root->left, address, min_size);
+    memory_block_t *right = find_preferred_free_block(root->right, address, min_size);
+    return left ? left : right;
+}
+
+static int compare(const void *a, const void *b)
+{
+    const size_t first = *(const size_t*)a;
+    const size_t second = *(const size_t*)b;
+
+    if (first < second) return -1;
+    else if (first > second) return 1;
+    else return 0;
+}
+
+memory_block_t *memory_get_block_for_address(void *address)
+{
+    return block_find((intptr_t)address < 0 ? memory_upper_space : memory_lower_space, PAGE_NUMBER((uintptr_t)address));
+}
+
+sysret_t memory_allocate(address_space_t *space,
+                         void **address,
+                         size_t size,
+                         memory_flags_t flags,
+                         memory_section_t *section,
+                         page_num_t section_offset)
+{
+    if (!space) return ERR_NOTFOUND;
+    sysret_t ret = ERR_NOMEMORY;
+    uintptr_t preferred_address = PAGE_NUMBER((uintptr_t)*address);
+
+    size = PAGE_NUMBER((uintptr_t)*address + size - 1) - preferred_address + 1;
+    if (!size) return ERR_SUCCESS;
+
+    lock_acquire_smart(&space->lock);
+
+    memory_block_t *block;
+    if (*address) block = find_preferred_free_block(space->by_size_tree.root, preferred_address, size);
+    else block = find_smallest_free_block(space->by_size_tree.root, size);
+    if (!block) goto cleanup;
+
+    if (*address)
+    {
+        size_t offset = preferred_address - block->address;
+        memory_block_t *new_block = block_create(space);
+        new_block->flags = MEMORY_BLOCK_FREE;
+        new_block->address = preferred_address;
+        new_block->size = block->size - offset;
+
+        avl_tree_change_key(&space->by_size_tree, &block->by_size_node, &offset);
+        avl_tree_insert(&space->by_addr_tree, &new_block->by_addr_node);
+        avl_tree_insert(&space->by_size_tree, &new_block->by_size_node);
+
+        block = new_block;
+    }
+
+    if (size < block->size)
+    {
+        memory_block_t *new_block = block_create(space);
+        new_block->flags = MEMORY_BLOCK_FREE;
+        new_block->address = block->address + size;
+        new_block->size = block->size - size;
+
+        avl_tree_change_key(&space->by_size_tree, &block->by_size_node, &size);
+        avl_tree_insert(&space->by_addr_tree, &new_block->by_addr_node);
+        avl_tree_insert(&space->by_size_tree, &new_block->by_size_node);
+    }
+
+    block->flags &= ~MEMORY_BLOCK_FREE;
+    block->section = section;
+    block->section_offset = section_offset;
+    *address = (void*)(block->address * PAGE_SIZE);
+    ret = ERR_SUCCESS;
+
+cleanup:
+    lock_release(&space->lock);
+    return ret;
+}
+
+sysret_t memory_free(address_space_t *space, void *address)
+{
+    sysret_t ret;
+    if (!space) return ERR_NOTFOUND;
+    lock_acquire_smart(&space->lock);
+
+    memory_block_t *block = block_find(space, PAGE_NUMBER((uintptr_t)address));
+    if (!block || (block->flags & MEMORY_BLOCK_FREE))
+    {
+        ret = ERR_INVALID;
+        goto cleanup;
+    }
+
+    block->flags = MEMORY_BLOCK_FREE;
+    block->section = NULL;
+    block->section_offset = 0;
+
+    for (;;)
+    {
+        avl_node_t *next = avl_get_next_node(&block->by_addr_node);
+        if (!next) break;
+
+        memory_block_t *next_block = CONTAINER_OF(next, memory_block_t, by_addr_node);
+        if (!(next_block->flags & MEMORY_BLOCK_FREE)) break;
+
+        size_t new_size = block->size + next_block->size;
+        avl_tree_change_key(&space->by_size_tree, &block->by_size_node, &new_size);
+
+        avl_tree_remove(&space->by_addr_tree, &next_block->by_addr_node);
+        avl_tree_remove(&space->by_size_tree, &next_block->by_size_node);
+        block_free(space, next_block);
+    }
+
+    for (;;)
+    {
+        avl_node_t *prev = avl_get_previous_node(&block->by_addr_node);
+        if (!prev) break;
+
+        memory_block_t *prev_block = CONTAINER_OF(prev, memory_block_t, by_addr_node);
+        if (!(prev_block->flags & MEMORY_BLOCK_FREE)) break;
+
+        size_t new_size = prev_block->size + block->size;
+        avl_tree_change_key(&space->by_size_tree, &prev_block->by_size_node, &new_size);
+
+        avl_tree_remove(&space->by_addr_tree, &block->by_addr_node);
+        avl_tree_remove(&space->by_size_tree, &block->by_size_node);
+        block_free(space, block);
+        block = prev_block;
+    }
+
+    ret = ERR_SUCCESS;
+
+cleanup:
+    lock_release(&space->lock);
+    return ret;
+}
+
+sysret_t new_syscall_alloc_memory(handle_t process, void **address, size_t size, memory_flags_t flags)
+{
+    return ERR_NOSYSCALL;
+}
+
+sysret_t new_syscall_free_memory(handle_t process, void *address)
+{
+    return ERR_NOSYSCALL;
+}
+
+sysret_t new_syscall_uncommit_memory(handle_t process, void *address, size_t size)
+{
+    return ERR_NOSYSCALL;
+}
+
+sysret_t new_syscall_query_memory(handle_t process, void *address, memory_block_info_t *info)
+{
+    return ERR_NOSYSCALL;
+}
+
+sysret_t new_syscall_protect_memory(handle_t process, void *address, size_t size, memory_flags_t flags)
+{
+    return ERR_NOSYSCALL;
+}
+
+sysret_t new_syscall_read_memory(handle_t process, void *address, void *buffer, dword_t size)
+{
+    return ERR_NOSYSCALL;
+}
+
+sysret_t new_syscall_write_memory(handle_t process, void *address, void *buffer, dword_t size)
+{
+    return ERR_NOSYSCALL;
+}
+
+void memory_init_virtual(const area_t *kernel_area)
+{
+    kernel_space.blocks = memory_request_metadata_space(SPACE_BLOCK_PAGES, PAGE_SIZE);
+    kernel_space.block_bitmap = memory_request_metadata_space(SPACE_BITMAP_PAGES, PAGE_SIZE);
+    uintptr_t global_metadata = memory_metadata_base;
+
+    user_memory_blocks = memory_request_metadata_space(SPACE_BLOCK_PAGES, PAGE_SIZE);
+    user_memory_block_bitmap = memory_request_metadata_space(SPACE_BITMAP_PAGES, PAGE_SIZE);
+
+    AVL_TREE_INIT(&kernel_space.by_addr_tree, memory_block_t, by_addr_node, address, compare);
+    AVL_TREE_INIT(&kernel_space.by_size_tree, memory_block_t, by_size_node, size, compare);
+    lock_init(&kernel_space.lock);
+
+    page_t *initial_block_page = memory_acquire_page(MIN_PHYS_ADDR_BITS, MAX_PHYS_ADDR_BITS, PAGE_SIZE);
+    ASSERT(initial_block_page != NULL);
+
+    page_t *initial_bitmap_page = memory_acquire_page(MIN_PHYS_ADDR_BITS, MAX_PHYS_ADDR_BITS, PAGE_SIZE);
+    ASSERT(initial_bitmap_page != NULL);
+
+    sysret_t ret = memory_map_page(memory_default_table,
+                                   initial_block_page,
+                                   kernel_space.blocks,
+                                   MEMORY_FLAG_ACCESSIBLE | MEMORY_FLAG_WRITABLE | MEMORY_FLAG_STICKY);
+    if (ret != ERR_SUCCESS) KERNEL_CRASH("Memory block mapping failed");
+
+    ret = memory_map_page(memory_default_table,
+                          initial_bitmap_page,
+                          kernel_space.block_bitmap,
+                          MEMORY_FLAG_ACCESSIBLE | MEMORY_FLAG_WRITABLE | MEMORY_FLAG_STICKY);
+    if (ret != ERR_SUCCESS) KERNEL_CRASH("Memory block bitmap mapping failed");
+
+    memory_block_t *root_block = block_create(&kernel_space);
+    root_block->address = PAGE_NUMBER((uintptr_t)INTPTR_MAX + 1);
+    root_block->size = PAGE_NUMBER(MEMORY_METADATA_TOP) - root_block->address;
+    root_block->flags = MEMORY_FLAG_FREE;
+    avl_tree_insert(&kernel_space.by_addr_tree, &root_block->by_addr_node);
+    avl_tree_insert(&kernel_space.by_size_tree, &root_block->by_size_node);
+
+    if (!(kernel_space.root_page_table = memory_create_page_table()))
+        KERNEL_CRASH("Cannot create kernel page directory");
+
+    void *address = (void*)memory_metadata_base;
+    ret = memory_allocate(memory_upper_space,
+                          &address,
+                          PAGE_NUMBER(global_metadata) - PAGE_NUMBER(memory_metadata_base),
+                          MEMORY_FLAG_ACCESSIBLE | MEMORY_FLAG_WRITABLE,
+                          NULL,
+                          0);
+    ASSERT(ret == ERR_SUCCESS);
+
+    address = (void*)global_metadata;
+    ret = memory_allocate(memory_upper_space,
+                          &address,
+                          PAGE_NUMBER(MEMORY_METADATA_TOP) - PAGE_NUMBER(global_metadata),
+                          MEMORY_FLAG_ACCESSIBLE | MEMORY_FLAG_WRITABLE | MEMORY_FLAG_STICKY,
+                          NULL,
+                          0);
+    ASSERT(ret == ERR_SUCCESS);
+
+    ret = memory_load_shadow_table(kernel_space.root_page_table);
+    if (ret != ERR_SUCCESS) KERNEL_CRASH("Cannot mount the kernel space");
+
+    address = (void*)((uintptr_t)INTPTR_MAX + 1);
+    ret = memory_allocate(memory_upper_space,
+                          &address,
+                          kernel_area->count * PAGE_SIZE,
+                          MEMORY_FLAG_ACCESSIBLE
+                          | MEMORY_FLAG_WRITABLE
+                          | MEMORY_FLAG_EXECUTABLE
+                          | MEMORY_FLAG_STICKY,
+                          NULL,
+                          0);
+    ASSERT(ret == ERR_SUCCESS);
+
+    ret = memory_map_area(memory_shadow_table,
+                          kernel_area,
+                          address,
+                          MEMORY_FLAG_ACCESSIBLE
+                          | MEMORY_FLAG_WRITABLE
+                          | MEMORY_FLAG_EXECUTABLE
+                          | MEMORY_FLAG_STICKY);
+    ASSERT(ret == ERR_SUCCESS);
+
+    memory_unload_shadow_table();
+    memory_load_default_table(kernel_space.root_page_table);
+}