zephyr/arch/x86/gen_mmu.py

549 lines
18 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Create the kernel's page tables for x86 CPUs.
For additional detail on paging and x86 memory management, please
consult the IA Architecture SW Developer Manual, volume 3a, chapter 4.
This script produces the initial page tables installed into the CPU
at early boot. These pages will have an identity mapping of the kernel
image. The script takes the 'zephyr_prebuilt.elf' as input to obtain region
sizes, certain memory addresses, and configuration values.
If CONFIG_SRAM_REGION_PERMISSIONS is not enabled, the kernel image will be
mapped with the Present and Write bits set. The linker scripts shouldn't
add page alignment padding between sections.
If CONFIG_SRAM_REGION_PERMISSIONS is enabled, the access permissions
vary:
- By default, the Present, Write, and Execute Disable bits are
set.
- The _image_text region will have Present and User bits set
- The _image_rodata region will have Present, User, and Execute
Disable bits set
- On x86_64, the _locore region will have Present set and
the _lorodata region will have Present and Execute Disable set.
Because the set of page tables are linked together by physical address,
we must know a priori the physical address of each table. The linker
script must define a z_x86_pagetables_start symbol where the page
tables will be placed, and this memory address must not shift between
prebuilt and final ELF builds. This script will not work on systems
where the physical load address of the kernel is unknown at build time.
64-bit systems will always build IA-32e page tables. 32-bit systems
build PAE page tables if CONFIG_X86_PAE is set, otherwise standard
32-bit page tables are built.
The kernel will expect to find the top-level structure of the produced
page tables at the physical address corresponding to the symbol
z_x86_kernel_ptables. The linker script will need to set that symbol
to the end of the binary produced by this script, minus the size of the
top-level paging structure as it is written out last.
"""
import sys
import array
import argparse
import os
import struct
import elftools
from distutils.version import LooseVersion
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection
if LooseVersion(elftools.__version__) < LooseVersion('0.24'):
sys.exit("pyelftools is out of date, need version 0.24 or later")
def bit(pos):
return 1 << pos
# Page table entry flags
FLAG_P = bit(0)
FLAG_RW = bit(1)
FLAG_US = bit(2)
FLAG_G = bit(8)
FLAG_XD = bit(63)
FLAG_IGNORED0 = bit(9)
FLAG_IGNORED1 = bit(10)
FLAG_IGNORED2 = bit(11)
ENTRY_RW = FLAG_RW | FLAG_IGNORED0
ENTRY_US = FLAG_US | FLAG_IGNORED1
ENTRY_XD = FLAG_XD | FLAG_IGNORED2
def debug(text):
if not args.verbose:
return
sys.stdout.write(os.path.basename(sys.argv[0]) + ": " + text + "\n")
def error(text):
sys.exit(os.path.basename(sys.argv[0]) + ": " + text)
def align_check(base, size):
if (base % 4096) != 0:
error("unaligned base address %x" % base)
if (size % 4096) != 0:
error("Unaligned region size %d for base %x" % (size, base))
def dump_flags(flags):
ret = ""
if flags & FLAG_P:
ret += "P "
if flags & FLAG_RW:
ret += "RW "
if flags & FLAG_US:
ret += "US "
if flags & FLAG_G:
ret += "G "
if flags & FLAG_XD:
ret += "XD "
return ret.strip()
# Hard-coded flags for intermediate paging levels. Permissive, we only control
# access or set caching properties at leaf levels.
INT_FLAGS = FLAG_P | FLAG_RW | FLAG_US
class MMUTable(object):
"""Represents a particular table in a set of page tables, at any level"""
def __init__(self):
self.entries = array.array(self.type_code,
[0 for i in range(self.num_entries)])
def get_binary(self):
"""Return a bytearray representation of this table"""
# Always little-endian
ctype = "<" + self.type_code
entry_size = struct.calcsize(ctype)
ret = bytearray(entry_size * self.num_entries)
for i in range(self.num_entries):
struct.pack_into(ctype, ret, entry_size * i, self.entries[i])
return ret
@property
def supported_flags(self):
"""Class property indicating what flag bits are supported"""
raise NotImplementedError()
@property
def addr_shift(self):
"""Class property for how much to shift virtual addresses to obtain
the appropriate index in the table for it"""
raise NotImplementedError()
@property
def addr_mask(self):
"""Mask to apply to an individual entry to get the physical address
mapping"""
raise NotImplementedError()
@property
def type_code(self):
"""Struct packing letter code for table entries. Either I for
32-bit entries, or Q for PAE/IA-32e"""
raise NotImplementedError()
@property
def num_entries(self):
"""Number of entries in the table. Varies by table type and paging
mode"""
raise NotImplementedError()
def entry_index(self, virt_addr):
"""Get the index of the entry in this table that corresponds to the
provided virtual address"""
return (virt_addr >> self.addr_shift) & (self.num_entries - 1)
def has_entry(self, virt_addr):
"""Indicate whether an entry is present in this table for the provided
virtual address"""
index = self.entry_index(virt_addr)
return (self.entries[index] & FLAG_P) != 0
def lookup(self, virt_addr):
"""Look up the physical mapping for a virtual address.
If this is a leaf table, this is the physical address mapping. If not,
this is the physical address of the next level table"""
index = self.entry_index(virt_addr)
return self.entries[index] & self.addr_mask
def map(self, virt_addr, phys_addr, entry_flags):
"""For the table entry corresponding to the provided virtual address,
set the corresponding physical entry in the table. Unsupported flags
will be filtered out.
If this is a leaf table, this is the physical address mapping. If not,
this is the physical address of the next level table"""
index = self.entry_index(virt_addr)
self.entries[index] = ((phys_addr & self.addr_mask) |
(entry_flags & self.supported_flags))
def set_perms(self, virt_addr, entry_flags):
""""For the table entry corresponding to the provided virtual address,
update just the flags, leaving the physical mapping alone.
Unsupported flags will be filtered out."""
index = self.entry_index(virt_addr)
self.entries[index] = ((self.entries[index] & self.addr_mask) |
(entry_flags & self.supported_flags))
# Specific supported table types
class Pml4(MMUTable):
"""Page mapping level 4 for IA-32e"""
addr_shift = 39
addr_mask = 0x7FFFFFFFFFFFF000
type_code = 'Q'
num_entries = 512
supported_flags = INT_FLAGS
class Pdpt(MMUTable):
"""Page directory pointer table for IA-32e"""
addr_shift = 30
addr_mask = 0x7FFFFFFFFFFFF000
type_code = 'Q'
num_entries = 512
supported_flags = INT_FLAGS
class PdptPAE(Pdpt):
"""Page directory pointer table for PAE"""
num_entries = 4
class Pd(MMUTable):
"""Page directory for 32-bit"""
addr_shift = 22
addr_mask = 0xFFFFF000
type_code = 'I'
num_entries = 1024
supported_flags = INT_FLAGS
class PdXd(Pd):
"""Page directory for either PAE or IA-32e"""
addr_shift = 21
addr_mask = 0x7FFFFFFFFFFFF000
num_entries = 512
type_code = 'Q'
class Pt(MMUTable):
"""Page table for 32-bit"""
addr_shift = 12
addr_mask = 0xFFFFF000
type_code = 'I'
num_entries = 1024
supported_flags = (FLAG_P | FLAG_RW | FLAG_US | FLAG_G |
FLAG_IGNORED0 | FLAG_IGNORED1)
class PtXd(Pt):
"""Page table for either PAE or IA-32e"""
addr_mask = 0x07FFFFFFFFFFF000
type_code = 'Q'
num_entries = 512
supported_flags = (FLAG_P | FLAG_RW | FLAG_US | FLAG_G | FLAG_XD |
FLAG_IGNORED0 | FLAG_IGNORED1 | FLAG_IGNORED2)
class PtableSet(object):
"""Represents a complete set of page tables for any paging mode"""
def __init__(self, pages_start):
"""Instantiate a set of page tables which will be located in the
image starting at the provided physical memory location"""
self.page_pos = pages_start
self.toplevel = self.levels[0]()
debug("%s starting at physical address 0x%x" %
(self.__class__.__name__, pages_start))
# Database of page table pages. Maps physical memory address to
# MMUTable objects, excluding the top-level table which is tracked
# separately. Starts out empty as we haven't mapped anything and
# the top-level table is tracked separately.
self.tables = {}
def get_new_mmutable_addr(self):
"""If we need to instantiate a new MMUTable, return a physical
address location for it"""
ret = self.page_pos
self.page_pos += 4096
return ret
@property
def levels(self):
"""Class hierarchy of paging levels, with the first entry being
the toplevel table class, and the last entry always being
some kind of leaf page table class (Pt or PtXd)"""
raise NotImplementedError()
def new_child_table(self, table, virt_addr, depth):
new_table_addr = self.get_new_mmutable_addr()
new_table = self.levels[depth]()
debug("new %s at physical addr 0x%x"
% (self.levels[depth].__name__, new_table_addr))
self.tables[new_table_addr] = new_table
table.map(virt_addr, new_table_addr, INT_FLAGS)
return new_table
def map_page(self, virt_addr, phys_addr, flags, reserve):
"""Map a virtual address to a physical address in the page tables,
with provided access flags"""
table = self.toplevel
# Create and link up intermediate tables if necessary
for depth in range(1, len(self.levels)):
# Create child table if needed
if not table.has_entry(virt_addr):
table = self.new_child_table(table, virt_addr, depth)
else:
table = self.tables[table.lookup(virt_addr)]
# Set up entry in leaf page table
if not reserve:
table.map(virt_addr, phys_addr, flags)
def reserve(self, virt_base, size):
debug("Reserving paging structures 0x%x (%d)" %
(virt_base, size))
align_check(virt_base, size)
# How much memory is covered by leaf page table
scope = 1 << self.levels[-2].addr_shift
if virt_base % scope != 0:
error("misaligned virtual address space, 0x%x not a multiple of 0x%x" %
(virt_base, scope))
for addr in range(virt_base, virt_base + size, scope):
self.map_page(addr, 0, 0, True)
def map(self, phys_base, size, flags):
"""Identity map an address range in the page tables, with provided
access flags.
"""
debug("Identity-mapping 0x%x (%d): %s" %
(phys_base, size, dump_flags(flags)))
align_check(phys_base, size)
for addr in range(phys_base, phys_base + size, 4096):
if addr == 0:
# Never map the NULL page
continue
self.map_page(addr, addr, flags, False)
def set_region_perms(self, name, flags):
"""Set access permissions for a named region that is already mapped
The bounds of the region will be looked up in the symbol table
with _start and _size suffixes. The physical address mapping
is unchanged and this will not disturb any double-mapping."""
# Doesn't matter if this is a virtual address, we have a
# either dual mapping or it's the same as physical
base = syms[name + "_start"]
size = syms[name + "_size"]
debug("change flags for %s at 0x%x (%d): %s" %
(name, base, size, dump_flags(flags)))
align_check(base, size)
try:
for addr in range(base, base + size, 4096):
# Never map the NULL page
if addr == 0:
continue
table = self.toplevel
for _ in range(1, len(self.levels)):
table = self.tables[table.lookup(addr)]
table.set_perms(addr, flags)
except KeyError:
error("no mapping for %s region 0x%x (size 0x%x)" %
(name, base, size))
def write_output(self, filename):
"""Write the page tables to the output file in binary format"""
with open(filename, "wb") as fp:
for addr in sorted(self.tables):
mmu_table = self.tables[addr]
fp.write(mmu_table.get_binary())
# We always have the top-level table be last. This is because
# in PAE, the top-level PDPT has only 4 entries and is not a
# full page in size. We do not put it in the tables dictionary
# and treat it as a special case.
debug("top-level %s at physical addr 0x%x" %
(self.toplevel.__class__.__name__,
self.get_new_mmutable_addr()))
fp.write(self.toplevel.get_binary())
# Paging mode classes, we'll use one depending on configuration
class Ptables32bit(PtableSet):
levels = [Pd, Pt]
class PtablesPAE(PtableSet):
levels = [PdptPAE, PdXd, PtXd]
class PtablesIA32e(PtableSet):
levels = [Pml4, Pdpt, PdXd, PtXd]
def parse_args():
global args
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("-k", "--kernel", required=True,
help="path to prebuilt kernel ELF binary")
parser.add_argument("-o", "--output", required=True,
help="output file")
parser.add_argument("-v", "--verbose", action="store_true",
help="Print extra debugging information")
args = parser.parse_args()
if "VERBOSE" in os.environ:
args.verbose = True
def get_symbols(obj):
for section in obj.iter_sections():
if isinstance(section, SymbolTableSection):
return {sym.name: sym.entry.st_value
for sym in section.iter_symbols()}
raise LookupError("Could not find symbol table")
def isdef(sym_name):
return sym_name in syms
def main():
global syms
parse_args()
with open(args.kernel, "rb") as fp:
kernel = ELFFile(fp)
syms = get_symbols(kernel)
if isdef("CONFIG_X86_64"):
pclass = PtablesIA32e
elif isdef("CONFIG_X86_PAE"):
pclass = PtablesPAE
else:
pclass = Ptables32bit
debug("building %s" % pclass.__name__)
vm_base = syms["CONFIG_KERNEL_VM_BASE"]
vm_size = syms["CONFIG_KERNEL_VM_SIZE"]
if isdef("CONFIG_ARCH_MAPS_ALL_RAM"):
image_base = syms["CONFIG_SRAM_BASE_ADDRESS"]
image_size = syms["CONFIG_SRAM_SIZE"] * 1024
else:
image_base = syms["z_mapped_start"]
image_size = syms["z_mapped_size"]
ptables_phys = syms["z_x86_pagetables_start"]
debug("Address space: 0x%x - 0x%x size %x" %
(vm_base, vm_base + vm_size, vm_size))
debug("Zephyr image: 0x%x - 0x%x size %x" %
(image_base, image_base + image_size, image_size))
is_perm_regions = isdef("CONFIG_SRAM_REGION_PERMISSIONS")
if image_size >= vm_size:
error("VM size is too small (have 0x%x need more than 0x%x)" % (vm_size, image_size))
if is_perm_regions:
# Don't allow execution by default for any pages. We'll adjust this
# in later calls to pt.set_region_perms()
map_flags = FLAG_P | ENTRY_XD
else:
map_flags = FLAG_P
pt = pclass(ptables_phys)
# Instantiate all the paging structures for the address space
pt.reserve(vm_base, vm_size)
# Map the zephyr image
pt.map(image_base, image_size, map_flags | ENTRY_RW)
if isdef("CONFIG_X86_64"):
# 64-bit has a special region in the first 64K to bootstrap other CPUs
# from real mode
locore_base = syms["_locore_start"]
locore_size = syms["_lodata_end"] - locore_base
debug("Base addresses: physical 0x%x size %d" % (locore_base,
locore_size))
pt.map(locore_base, locore_size, map_flags | ENTRY_RW)
if isdef("CONFIG_XIP"):
# Additionally identity-map all ROM as read-only
pt.map(syms["CONFIG_FLASH_BASE_ADDRESS"],
syms["CONFIG_FLASH_SIZE"] * 1024, map_flags)
# Adjust mapped region permissions if configured
if is_perm_regions:
# Need to accomplish the following things:
# - Text regions need the XD flag cleared and RW flag removed
# if not built with gdbstub support
# - Rodata regions need the RW flag cleared
# - User mode needs access as we currently do not separate application
# text/rodata from kernel text/rodata
if isdef("CONFIG_GDBSTUB"):
pt.set_region_perms("_image_text", FLAG_P | ENTRY_US | ENTRY_RW)
else:
pt.set_region_perms("_image_text", FLAG_P | ENTRY_US)
pt.set_region_perms("_image_rodata", FLAG_P | ENTRY_US | ENTRY_XD)
if isdef("CONFIG_COVERAGE_GCOV") and isdef("CONFIG_USERSPACE"):
# If GCOV is enabled, user mode must be able to write to its
# common data area
pt.set_region_perms("__gcov_bss",
FLAG_P | ENTRY_RW | ENTRY_US | ENTRY_XD)
if isdef("CONFIG_X86_64"):
# Set appropriate permissions for locore areas much like we did
# with the main text/rodata regions
if isdef("CONFIG_X86_KPTI"):
# Set the User bit for the read-only locore/lorodata areas.
# This ensures they get mapped into the User page tables if
# KPTI is turned on. There is no sensitive data in them, and
# they contain text/data needed to take an exception or
# interrupt.
flag_user = ENTRY_US
else:
flag_user = 0
pt.set_region_perms("_locore", FLAG_P | flag_user)
pt.set_region_perms("_lorodata", FLAG_P | ENTRY_XD | flag_user)
pt.write_output(args.output)
if __name__ == "__main__":
main()