549 lines
18 KiB
Python
Executable File
549 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# Copyright (c) 2020 Intel Corporation
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
"""Create the kernel's page tables for x86 CPUs.
|
|
|
|
For additional detail on paging and x86 memory management, please
|
|
consult the IA Architecture SW Developer Manual, volume 3a, chapter 4.
|
|
|
|
This script produces the initial page tables installed into the CPU
|
|
at early boot. These pages will have an identity mapping of the kernel
|
|
image. The script takes the 'zephyr_prebuilt.elf' as input to obtain region
|
|
sizes, certain memory addresses, and configuration values.
|
|
|
|
If CONFIG_SRAM_REGION_PERMISSIONS is not enabled, the kernel image will be
|
|
mapped with the Present and Write bits set. The linker scripts shouldn't
|
|
add page alignment padding between sections.
|
|
|
|
If CONFIG_SRAM_REGION_PERMISSIONS is enabled, the access permissions
|
|
vary:
|
|
- By default, the Present, Write, and Execute Disable bits are
|
|
set.
|
|
- The _image_text region will have Present and User bits set
|
|
- The _image_rodata region will have Present, User, and Execute
|
|
Disable bits set
|
|
- On x86_64, the _locore region will have Present set and
|
|
the _lorodata region will have Present and Execute Disable set.
|
|
|
|
Because the set of page tables are linked together by physical address,
|
|
we must know a priori the physical address of each table. The linker
|
|
script must define a z_x86_pagetables_start symbol where the page
|
|
tables will be placed, and this memory address must not shift between
|
|
prebuilt and final ELF builds. This script will not work on systems
|
|
where the physical load address of the kernel is unknown at build time.
|
|
|
|
64-bit systems will always build IA-32e page tables. 32-bit systems
|
|
build PAE page tables if CONFIG_X86_PAE is set, otherwise standard
|
|
32-bit page tables are built.
|
|
|
|
The kernel will expect to find the top-level structure of the produced
|
|
page tables at the physical address corresponding to the symbol
|
|
z_x86_kernel_ptables. The linker script will need to set that symbol
|
|
to the end of the binary produced by this script, minus the size of the
|
|
top-level paging structure as it is written out last.
|
|
"""
|
|
|
|
import sys
|
|
import array
|
|
import argparse
|
|
import os
|
|
import struct
|
|
import elftools
|
|
from distutils.version import LooseVersion
|
|
from elftools.elf.elffile import ELFFile
|
|
from elftools.elf.sections import SymbolTableSection
|
|
|
|
if LooseVersion(elftools.__version__) < LooseVersion('0.24'):
|
|
sys.exit("pyelftools is out of date, need version 0.24 or later")
|
|
|
|
|
|
def bit(pos):
|
|
return 1 << pos
|
|
|
|
|
|
# Page table entry flags
|
|
FLAG_P = bit(0)
|
|
FLAG_RW = bit(1)
|
|
FLAG_US = bit(2)
|
|
FLAG_G = bit(8)
|
|
FLAG_XD = bit(63)
|
|
|
|
FLAG_IGNORED0 = bit(9)
|
|
FLAG_IGNORED1 = bit(10)
|
|
FLAG_IGNORED2 = bit(11)
|
|
|
|
ENTRY_RW = FLAG_RW | FLAG_IGNORED0
|
|
ENTRY_US = FLAG_US | FLAG_IGNORED1
|
|
ENTRY_XD = FLAG_XD | FLAG_IGNORED2
|
|
|
|
def debug(text):
|
|
if not args.verbose:
|
|
return
|
|
sys.stdout.write(os.path.basename(sys.argv[0]) + ": " + text + "\n")
|
|
|
|
|
|
def error(text):
|
|
sys.exit(os.path.basename(sys.argv[0]) + ": " + text)
|
|
|
|
|
|
def align_check(base, size):
|
|
if (base % 4096) != 0:
|
|
error("unaligned base address %x" % base)
|
|
if (size % 4096) != 0:
|
|
error("Unaligned region size %d for base %x" % (size, base))
|
|
|
|
|
|
def dump_flags(flags):
|
|
ret = ""
|
|
|
|
if flags & FLAG_P:
|
|
ret += "P "
|
|
|
|
if flags & FLAG_RW:
|
|
ret += "RW "
|
|
|
|
if flags & FLAG_US:
|
|
ret += "US "
|
|
|
|
if flags & FLAG_G:
|
|
ret += "G "
|
|
|
|
if flags & FLAG_XD:
|
|
ret += "XD "
|
|
|
|
return ret.strip()
|
|
|
|
# Hard-coded flags for intermediate paging levels. Permissive, we only control
|
|
# access or set caching properties at leaf levels.
|
|
INT_FLAGS = FLAG_P | FLAG_RW | FLAG_US
|
|
|
|
class MMUTable(object):
|
|
"""Represents a particular table in a set of page tables, at any level"""
|
|
|
|
def __init__(self):
|
|
self.entries = array.array(self.type_code,
|
|
[0 for i in range(self.num_entries)])
|
|
|
|
def get_binary(self):
|
|
"""Return a bytearray representation of this table"""
|
|
# Always little-endian
|
|
ctype = "<" + self.type_code
|
|
entry_size = struct.calcsize(ctype)
|
|
ret = bytearray(entry_size * self.num_entries)
|
|
|
|
for i in range(self.num_entries):
|
|
struct.pack_into(ctype, ret, entry_size * i, self.entries[i])
|
|
return ret
|
|
|
|
@property
|
|
def supported_flags(self):
|
|
"""Class property indicating what flag bits are supported"""
|
|
raise NotImplementedError()
|
|
|
|
@property
|
|
def addr_shift(self):
|
|
"""Class property for how much to shift virtual addresses to obtain
|
|
the appropriate index in the table for it"""
|
|
raise NotImplementedError()
|
|
|
|
@property
|
|
def addr_mask(self):
|
|
"""Mask to apply to an individual entry to get the physical address
|
|
mapping"""
|
|
raise NotImplementedError()
|
|
|
|
@property
|
|
def type_code(self):
|
|
"""Struct packing letter code for table entries. Either I for
|
|
32-bit entries, or Q for PAE/IA-32e"""
|
|
raise NotImplementedError()
|
|
|
|
@property
|
|
def num_entries(self):
|
|
"""Number of entries in the table. Varies by table type and paging
|
|
mode"""
|
|
raise NotImplementedError()
|
|
|
|
def entry_index(self, virt_addr):
|
|
"""Get the index of the entry in this table that corresponds to the
|
|
provided virtual address"""
|
|
return (virt_addr >> self.addr_shift) & (self.num_entries - 1)
|
|
|
|
def has_entry(self, virt_addr):
|
|
"""Indicate whether an entry is present in this table for the provided
|
|
virtual address"""
|
|
index = self.entry_index(virt_addr)
|
|
|
|
return (self.entries[index] & FLAG_P) != 0
|
|
|
|
def lookup(self, virt_addr):
|
|
"""Look up the physical mapping for a virtual address.
|
|
|
|
If this is a leaf table, this is the physical address mapping. If not,
|
|
this is the physical address of the next level table"""
|
|
index = self.entry_index(virt_addr)
|
|
|
|
return self.entries[index] & self.addr_mask
|
|
|
|
def map(self, virt_addr, phys_addr, entry_flags):
|
|
"""For the table entry corresponding to the provided virtual address,
|
|
set the corresponding physical entry in the table. Unsupported flags
|
|
will be filtered out.
|
|
|
|
If this is a leaf table, this is the physical address mapping. If not,
|
|
this is the physical address of the next level table"""
|
|
index = self.entry_index(virt_addr)
|
|
|
|
self.entries[index] = ((phys_addr & self.addr_mask) |
|
|
(entry_flags & self.supported_flags))
|
|
|
|
def set_perms(self, virt_addr, entry_flags):
|
|
""""For the table entry corresponding to the provided virtual address,
|
|
update just the flags, leaving the physical mapping alone.
|
|
Unsupported flags will be filtered out."""
|
|
index = self.entry_index(virt_addr)
|
|
|
|
self.entries[index] = ((self.entries[index] & self.addr_mask) |
|
|
(entry_flags & self.supported_flags))
|
|
|
|
|
|
# Specific supported table types
|
|
class Pml4(MMUTable):
|
|
"""Page mapping level 4 for IA-32e"""
|
|
addr_shift = 39
|
|
addr_mask = 0x7FFFFFFFFFFFF000
|
|
type_code = 'Q'
|
|
num_entries = 512
|
|
supported_flags = INT_FLAGS
|
|
|
|
class Pdpt(MMUTable):
|
|
"""Page directory pointer table for IA-32e"""
|
|
addr_shift = 30
|
|
addr_mask = 0x7FFFFFFFFFFFF000
|
|
type_code = 'Q'
|
|
num_entries = 512
|
|
supported_flags = INT_FLAGS
|
|
|
|
class PdptPAE(Pdpt):
|
|
"""Page directory pointer table for PAE"""
|
|
num_entries = 4
|
|
|
|
class Pd(MMUTable):
|
|
"""Page directory for 32-bit"""
|
|
addr_shift = 22
|
|
addr_mask = 0xFFFFF000
|
|
type_code = 'I'
|
|
num_entries = 1024
|
|
supported_flags = INT_FLAGS
|
|
|
|
class PdXd(Pd):
|
|
"""Page directory for either PAE or IA-32e"""
|
|
addr_shift = 21
|
|
addr_mask = 0x7FFFFFFFFFFFF000
|
|
num_entries = 512
|
|
type_code = 'Q'
|
|
|
|
class Pt(MMUTable):
|
|
"""Page table for 32-bit"""
|
|
addr_shift = 12
|
|
addr_mask = 0xFFFFF000
|
|
type_code = 'I'
|
|
num_entries = 1024
|
|
supported_flags = (FLAG_P | FLAG_RW | FLAG_US | FLAG_G |
|
|
FLAG_IGNORED0 | FLAG_IGNORED1)
|
|
|
|
class PtXd(Pt):
|
|
"""Page table for either PAE or IA-32e"""
|
|
addr_mask = 0x07FFFFFFFFFFF000
|
|
type_code = 'Q'
|
|
num_entries = 512
|
|
supported_flags = (FLAG_P | FLAG_RW | FLAG_US | FLAG_G | FLAG_XD |
|
|
FLAG_IGNORED0 | FLAG_IGNORED1 | FLAG_IGNORED2)
|
|
|
|
|
|
class PtableSet(object):
|
|
"""Represents a complete set of page tables for any paging mode"""
|
|
|
|
def __init__(self, pages_start):
|
|
"""Instantiate a set of page tables which will be located in the
|
|
image starting at the provided physical memory location"""
|
|
self.page_pos = pages_start
|
|
self.toplevel = self.levels[0]()
|
|
|
|
debug("%s starting at physical address 0x%x" %
|
|
(self.__class__.__name__, pages_start))
|
|
|
|
# Database of page table pages. Maps physical memory address to
|
|
# MMUTable objects, excluding the top-level table which is tracked
|
|
# separately. Starts out empty as we haven't mapped anything and
|
|
# the top-level table is tracked separately.
|
|
self.tables = {}
|
|
|
|
def get_new_mmutable_addr(self):
|
|
"""If we need to instantiate a new MMUTable, return a physical
|
|
address location for it"""
|
|
ret = self.page_pos
|
|
self.page_pos += 4096
|
|
return ret
|
|
|
|
@property
|
|
def levels(self):
|
|
"""Class hierarchy of paging levels, with the first entry being
|
|
the toplevel table class, and the last entry always being
|
|
some kind of leaf page table class (Pt or PtXd)"""
|
|
raise NotImplementedError()
|
|
|
|
def new_child_table(self, table, virt_addr, depth):
|
|
new_table_addr = self.get_new_mmutable_addr()
|
|
new_table = self.levels[depth]()
|
|
debug("new %s at physical addr 0x%x"
|
|
% (self.levels[depth].__name__, new_table_addr))
|
|
self.tables[new_table_addr] = new_table
|
|
table.map(virt_addr, new_table_addr, INT_FLAGS)
|
|
|
|
return new_table
|
|
|
|
def map_page(self, virt_addr, phys_addr, flags, reserve):
|
|
"""Map a virtual address to a physical address in the page tables,
|
|
with provided access flags"""
|
|
table = self.toplevel
|
|
|
|
# Create and link up intermediate tables if necessary
|
|
for depth in range(1, len(self.levels)):
|
|
# Create child table if needed
|
|
if not table.has_entry(virt_addr):
|
|
table = self.new_child_table(table, virt_addr, depth)
|
|
else:
|
|
table = self.tables[table.lookup(virt_addr)]
|
|
|
|
# Set up entry in leaf page table
|
|
if not reserve:
|
|
table.map(virt_addr, phys_addr, flags)
|
|
|
|
def reserve(self, virt_base, size):
|
|
debug("Reserving paging structures 0x%x (%d)" %
|
|
(virt_base, size))
|
|
|
|
align_check(virt_base, size)
|
|
|
|
# How much memory is covered by leaf page table
|
|
scope = 1 << self.levels[-2].addr_shift
|
|
|
|
if virt_base % scope != 0:
|
|
error("misaligned virtual address space, 0x%x not a multiple of 0x%x" %
|
|
(virt_base, scope))
|
|
|
|
for addr in range(virt_base, virt_base + size, scope):
|
|
self.map_page(addr, 0, 0, True)
|
|
|
|
def map(self, phys_base, size, flags):
|
|
"""Identity map an address range in the page tables, with provided
|
|
access flags.
|
|
"""
|
|
debug("Identity-mapping 0x%x (%d): %s" %
|
|
(phys_base, size, dump_flags(flags)))
|
|
|
|
align_check(phys_base, size)
|
|
for addr in range(phys_base, phys_base + size, 4096):
|
|
if addr == 0:
|
|
# Never map the NULL page
|
|
continue
|
|
|
|
self.map_page(addr, addr, flags, False)
|
|
|
|
def set_region_perms(self, name, flags):
|
|
"""Set access permissions for a named region that is already mapped
|
|
|
|
The bounds of the region will be looked up in the symbol table
|
|
with _start and _size suffixes. The physical address mapping
|
|
is unchanged and this will not disturb any double-mapping."""
|
|
|
|
# Doesn't matter if this is a virtual address, we have a
|
|
# either dual mapping or it's the same as physical
|
|
base = syms[name + "_start"]
|
|
size = syms[name + "_size"]
|
|
|
|
debug("change flags for %s at 0x%x (%d): %s" %
|
|
(name, base, size, dump_flags(flags)))
|
|
align_check(base, size)
|
|
|
|
try:
|
|
for addr in range(base, base + size, 4096):
|
|
# Never map the NULL page
|
|
if addr == 0:
|
|
continue
|
|
|
|
table = self.toplevel
|
|
for _ in range(1, len(self.levels)):
|
|
table = self.tables[table.lookup(addr)]
|
|
table.set_perms(addr, flags)
|
|
except KeyError:
|
|
error("no mapping for %s region 0x%x (size 0x%x)" %
|
|
(name, base, size))
|
|
|
|
def write_output(self, filename):
|
|
"""Write the page tables to the output file in binary format"""
|
|
with open(filename, "wb") as fp:
|
|
for addr in sorted(self.tables):
|
|
mmu_table = self.tables[addr]
|
|
fp.write(mmu_table.get_binary())
|
|
|
|
# We always have the top-level table be last. This is because
|
|
# in PAE, the top-level PDPT has only 4 entries and is not a
|
|
# full page in size. We do not put it in the tables dictionary
|
|
# and treat it as a special case.
|
|
debug("top-level %s at physical addr 0x%x" %
|
|
(self.toplevel.__class__.__name__,
|
|
self.get_new_mmutable_addr()))
|
|
fp.write(self.toplevel.get_binary())
|
|
|
|
# Paging mode classes, we'll use one depending on configuration
|
|
class Ptables32bit(PtableSet):
|
|
levels = [Pd, Pt]
|
|
|
|
class PtablesPAE(PtableSet):
|
|
levels = [PdptPAE, PdXd, PtXd]
|
|
|
|
class PtablesIA32e(PtableSet):
|
|
levels = [Pml4, Pdpt, PdXd, PtXd]
|
|
|
|
|
|
def parse_args():
|
|
global args
|
|
parser = argparse.ArgumentParser(
|
|
description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
|
|
parser.add_argument("-k", "--kernel", required=True,
|
|
help="path to prebuilt kernel ELF binary")
|
|
parser.add_argument("-o", "--output", required=True,
|
|
help="output file")
|
|
parser.add_argument("-v", "--verbose", action="store_true",
|
|
help="Print extra debugging information")
|
|
args = parser.parse_args()
|
|
if "VERBOSE" in os.environ:
|
|
args.verbose = True
|
|
|
|
|
|
def get_symbols(obj):
|
|
for section in obj.iter_sections():
|
|
if isinstance(section, SymbolTableSection):
|
|
return {sym.name: sym.entry.st_value
|
|
for sym in section.iter_symbols()}
|
|
|
|
raise LookupError("Could not find symbol table")
|
|
|
|
def isdef(sym_name):
|
|
return sym_name in syms
|
|
|
|
def main():
|
|
global syms
|
|
parse_args()
|
|
|
|
with open(args.kernel, "rb") as fp:
|
|
kernel = ELFFile(fp)
|
|
syms = get_symbols(kernel)
|
|
|
|
if isdef("CONFIG_X86_64"):
|
|
pclass = PtablesIA32e
|
|
elif isdef("CONFIG_X86_PAE"):
|
|
pclass = PtablesPAE
|
|
else:
|
|
pclass = Ptables32bit
|
|
|
|
debug("building %s" % pclass.__name__)
|
|
|
|
vm_base = syms["CONFIG_KERNEL_VM_BASE"]
|
|
vm_size = syms["CONFIG_KERNEL_VM_SIZE"]
|
|
|
|
if isdef("CONFIG_ARCH_MAPS_ALL_RAM"):
|
|
image_base = syms["CONFIG_SRAM_BASE_ADDRESS"]
|
|
image_size = syms["CONFIG_SRAM_SIZE"] * 1024
|
|
else:
|
|
image_base = syms["z_mapped_start"]
|
|
image_size = syms["z_mapped_size"]
|
|
ptables_phys = syms["z_x86_pagetables_start"]
|
|
|
|
debug("Address space: 0x%x - 0x%x size %x" %
|
|
(vm_base, vm_base + vm_size, vm_size))
|
|
|
|
debug("Zephyr image: 0x%x - 0x%x size %x" %
|
|
(image_base, image_base + image_size, image_size))
|
|
|
|
is_perm_regions = isdef("CONFIG_SRAM_REGION_PERMISSIONS")
|
|
|
|
if image_size >= vm_size:
|
|
error("VM size is too small (have 0x%x need more than 0x%x)" % (vm_size, image_size))
|
|
|
|
if is_perm_regions:
|
|
# Don't allow execution by default for any pages. We'll adjust this
|
|
# in later calls to pt.set_region_perms()
|
|
map_flags = FLAG_P | ENTRY_XD
|
|
else:
|
|
map_flags = FLAG_P
|
|
|
|
pt = pclass(ptables_phys)
|
|
# Instantiate all the paging structures for the address space
|
|
pt.reserve(vm_base, vm_size)
|
|
# Map the zephyr image
|
|
pt.map(image_base, image_size, map_flags | ENTRY_RW)
|
|
|
|
if isdef("CONFIG_X86_64"):
|
|
# 64-bit has a special region in the first 64K to bootstrap other CPUs
|
|
# from real mode
|
|
locore_base = syms["_locore_start"]
|
|
locore_size = syms["_lodata_end"] - locore_base
|
|
debug("Base addresses: physical 0x%x size %d" % (locore_base,
|
|
locore_size))
|
|
pt.map(locore_base, locore_size, map_flags | ENTRY_RW)
|
|
|
|
if isdef("CONFIG_XIP"):
|
|
# Additionally identity-map all ROM as read-only
|
|
pt.map(syms["CONFIG_FLASH_BASE_ADDRESS"],
|
|
syms["CONFIG_FLASH_SIZE"] * 1024, map_flags)
|
|
|
|
# Adjust mapped region permissions if configured
|
|
if is_perm_regions:
|
|
# Need to accomplish the following things:
|
|
# - Text regions need the XD flag cleared and RW flag removed
|
|
# if not built with gdbstub support
|
|
# - Rodata regions need the RW flag cleared
|
|
# - User mode needs access as we currently do not separate application
|
|
# text/rodata from kernel text/rodata
|
|
if isdef("CONFIG_GDBSTUB"):
|
|
pt.set_region_perms("_image_text", FLAG_P | ENTRY_US | ENTRY_RW)
|
|
else:
|
|
pt.set_region_perms("_image_text", FLAG_P | ENTRY_US)
|
|
pt.set_region_perms("_image_rodata", FLAG_P | ENTRY_US | ENTRY_XD)
|
|
|
|
if isdef("CONFIG_COVERAGE_GCOV") and isdef("CONFIG_USERSPACE"):
|
|
# If GCOV is enabled, user mode must be able to write to its
|
|
# common data area
|
|
pt.set_region_perms("__gcov_bss",
|
|
FLAG_P | ENTRY_RW | ENTRY_US | ENTRY_XD)
|
|
|
|
if isdef("CONFIG_X86_64"):
|
|
# Set appropriate permissions for locore areas much like we did
|
|
# with the main text/rodata regions
|
|
|
|
if isdef("CONFIG_X86_KPTI"):
|
|
# Set the User bit for the read-only locore/lorodata areas.
|
|
# This ensures they get mapped into the User page tables if
|
|
# KPTI is turned on. There is no sensitive data in them, and
|
|
# they contain text/data needed to take an exception or
|
|
# interrupt.
|
|
flag_user = ENTRY_US
|
|
else:
|
|
flag_user = 0
|
|
|
|
pt.set_region_perms("_locore", FLAG_P | flag_user)
|
|
pt.set_region_perms("_lorodata", FLAG_P | ENTRY_XD | flag_user)
|
|
|
|
pt.write_output(args.output)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|