back.pysim: eliminate most dictionary lookups.

This makes the Glasgow testsuite about 30% faster.
This commit is contained in:
whitequark 2018-12-18 15:28:27 +00:00
parent 7341d0d7ef
commit 7fa82a70be
3 changed files with 155 additions and 88 deletions

View file

@ -10,7 +10,7 @@ cache:
before_install: before_install:
- export PATH="/usr/lib/ccache:$HOME/.local/bin:$PATH" - export PATH="/usr/lib/ccache:$HOME/.local/bin:$PATH"
install: install:
- pip install coverage codecov pyvcd - pip install coverage codecov pyvcd bitarray
- git clone https://github.com/YosysHQ/yosys - git clone https://github.com/YosysHQ/yosys
- (cd yosys && if ! yosys -V || [ $(git rev-parse HEAD $(yosys -V | awk 'match($0,/sha1 ([0-9a-f]+)/,m) { print m[1] }') | uniq | wc -l) != 1 ]; then make CONFIG=gcc ENABLE_ABC=0 PREFIX=$HOME/.local install; fi) - (cd yosys && if ! yosys -V || [ $(git rev-parse HEAD $(yosys -V | awk 'match($0,/sha1 ([0-9a-f]+)/,m) { print m[1] }') | uniq | wc -l) != 1 ]; then make CONFIG=gcc ENABLE_ABC=0 PREFIX=$HOME/.local install; fi)
script: script:

View file

@ -1,6 +1,7 @@
import math import math
import inspect import inspect
from contextlib import contextmanager from contextlib import contextmanager
from bitarray import bitarray
from vcd import VCDWriter from vcd import VCDWriter
from vcd.gtkw import GTKWSave from vcd.gtkw import GTKWSave
@ -22,30 +23,50 @@ class _State:
def __init__(self): def __init__(self):
self.curr = [] self.curr = []
self.next = [] self.next = []
self.curr_dirty = SignalSet() self.curr_dirty = bitarray()
self.next_dirty = SignalSet() self.next_dirty = bitarray()
def add(self, signal, value): def add(self, value):
slot = len(self.curr) slot = len(self.curr)
self.curr.append(value) self.curr.append(value)
self.next.append(value) self.next.append(value)
self.curr_dirty.add(signal) self.curr_dirty.append(True)
self.next_dirty.append(False)
return slot return slot
def set(self, signal, slot, value): def set(self, slot, value):
if self.next[slot] != value: if self.next[slot] != value:
self.next_dirty.add(signal) self.next_dirty[slot] = True
self.next[slot] = value self.next[slot] = value
def commit(self, signal, slot): def commit(self, slot):
old_value = self.curr[slot] old_value = self.curr[slot]
new_value = self.next[slot] new_value = self.next[slot]
if old_value != new_value: if old_value != new_value:
self.next_dirty.remove(signal) self.next_dirty[slot] = False
self.curr_dirty.add(signal) self.curr_dirty[slot] = True
self.curr[slot] = new_value self.curr[slot] = new_value
return old_value, new_value return old_value, new_value
def flush_curr_dirty(self):
while True:
try:
slot = self.curr_dirty.index(True)
except ValueError:
break
self.curr_dirty[slot] = False
yield slot
def iter_next_dirty(self):
start = 0
while True:
try:
slot = self.next_dirty.index(True, start)
start = slot + 1
except ValueError:
break
yield slot
normalize = Const.normalize normalize = Const.normalize
@ -185,7 +206,7 @@ class _LHSValueCompiler(AbstractValueTransformer):
shape = value.shape() shape = value.shape()
value_slot = self.signal_slots[value] value_slot = self.signal_slots[value]
def eval(state, rhs): def eval(state, rhs):
state.set(value, value_slot, normalize(rhs, shape)) state.set(value_slot, normalize(rhs, shape))
return eval return eval
def on_ClockSignal(self, value): def on_ClockSignal(self, value):
@ -293,15 +314,17 @@ class Simulator:
def __init__(self, fragment, vcd_file=None, gtkw_file=None, traces=()): def __init__(self, fragment, vcd_file=None, gtkw_file=None, traces=()):
self._fragment = fragment self._fragment = fragment
self._signal_slots = SignalDict() # Signal -> int/slot
self._slot_signals = list() # int/slot -> Signal
self._domains = dict() # str/domain -> ClockDomain self._domains = dict() # str/domain -> ClockDomain
self._domain_triggers = SignalDict() # Signal -> str/domain self._domain_triggers = list() # int/slot -> str/domain
self._domain_signals = dict() # str/domain -> {Signal}
self._signals = SignalSet() # {Signal} self._signals = SignalSet() # {Signal}
self._comb_signals = SignalSet() # {Signal} self._comb_signals = bitarray() # {Signal}
self._sync_signals = SignalSet() # {Signal} self._sync_signals = bitarray() # {Signal}
self._user_signals = SignalSet() # {Signal} self._user_signals = bitarray() # {Signal}
self._signal_slots = SignalDict() # Signal -> int/slot self._domain_signals = dict() # str/domain -> {Signal}
self._started = False self._started = False
self._timestamp = 0. self._timestamp = 0.
@ -317,12 +340,12 @@ class Simulator:
self._wait_deadline = dict() # process -> float/timestamp self._wait_deadline = dict() # process -> float/timestamp
self._wait_tick = dict() # process -> str/domain self._wait_tick = dict() # process -> str/domain
self._funclets = SignalDict() # Signal -> set(lambda) self._funclets = list() # int/slot -> set(lambda)
self._vcd_file = vcd_file self._vcd_file = vcd_file
self._vcd_writer = None self._vcd_writer = None
self._vcd_signals = SignalDict() # signal -> set(vcd_signal) self._vcd_signals = list() # int/slot -> set(vcd_signal)
self._vcd_names = SignalDict() # signal -> str/name self._vcd_names = list() # int/slot -> str/name
self._gtkw_file = gtkw_file self._gtkw_file = gtkw_file
self._traces = traces self._traces = traces
@ -387,13 +410,7 @@ class Simulator:
comment="Generated by nMigen") comment="Generated by nMigen")
root_fragment = self._fragment.prepare() root_fragment = self._fragment.prepare()
self._domains = root_fragment.domains self._domains = root_fragment.domains
for domain, cd in self._domains.items():
self._domain_triggers[cd.clk] = domain
if cd.rst is not None:
self._domain_triggers[cd.rst] = domain
self._domain_signals[domain] = SignalSet()
hierarchy = {} hierarchy = {}
def add_fragment(fragment, scope=()): def add_fragment(fragment, scope=()):
@ -402,21 +419,48 @@ class Simulator:
add_fragment(subfragment, (*scope, name)) add_fragment(subfragment, (*scope, name))
add_fragment(root_fragment) add_fragment(root_fragment)
def add_signal(signal):
if signal not in self._signals:
self._signals.add(signal)
signal_slot = self._state.add(normalize(signal.reset, signal.shape()))
self._signal_slots[signal] = signal_slot
self._slot_signals.append(signal)
self._comb_signals.append(False)
self._sync_signals.append(False)
self._user_signals.append(False)
for domain in self._domains:
if domain not in self._domain_signals:
self._domain_signals[domain] = bitarray()
self._domain_signals[domain].append(False)
self._domain_triggers.append(None)
if self._vcd_writer:
self._vcd_signals.append(set())
self._vcd_names.append(None)
return self._signal_slots[signal]
def add_domain_signal(signal, domain):
signal_slot = add_signal(signal)
self._domain_triggers[signal_slot] = domain
for fragment, fragment_scope in hierarchy.items(): for fragment, fragment_scope in hierarchy.items():
for signal in fragment.iter_signals(): for signal in fragment.iter_signals():
if signal not in self._signals: add_signal(signal)
self._signals.add(signal)
signal_slot = self._state.add(signal, normalize(signal.reset, signal.shape())) for domain, cd in fragment.domains.items():
self._signal_slots[signal] = signal_slot add_domain_signal(cd.clk, domain)
if cd.rst is not None:
add_domain_signal(cd.rst, domain)
for fragment, fragment_scope in hierarchy.items(): for fragment, fragment_scope in hierarchy.items():
for signal in fragment.iter_signals(): for signal in fragment.iter_signals():
if not self._vcd_writer: if not self._vcd_writer:
continue continue
if signal not in self._vcd_signals: signal_slot = self._signal_slots[signal]
self._vcd_signals[signal] = set()
for subfragment, name in fragment.subfragments: for subfragment, name in fragment.subfragments:
if signal in subfragment.ports: if signal in subfragment.ports:
@ -441,21 +485,27 @@ class Simulator:
var_name_suffix = var_name var_name_suffix = var_name
else: else:
var_name_suffix = "{}${}".format(var_name, suffix) var_name_suffix = "{}${}".format(var_name, suffix)
self._vcd_signals[signal].add(self._vcd_writer.register_var( self._vcd_signals[signal_slot].add(self._vcd_writer.register_var(
scope=".".join(fragment_scope), name=var_name_suffix, scope=".".join(fragment_scope), name=var_name_suffix,
var_type=var_type, size=var_size, init=var_init)) var_type=var_type, size=var_size, init=var_init))
if signal not in self._vcd_names: if self._vcd_names[signal_slot] is None:
self._vcd_names[signal] = ".".join(fragment_scope + (var_name_suffix,)) self._vcd_names[signal_slot] = \
".".join(fragment_scope + (var_name_suffix,))
break break
except KeyError: except KeyError:
suffix = (suffix or 0) + 1 suffix = (suffix or 0) + 1
for domain, signals in fragment.drivers.items(): for domain, signals in fragment.drivers.items():
signals_bits = bitarray(len(self._signals))
signals_bits.setall(False)
for signal in signals:
signals_bits[self._signal_slots[signal]] = True
if domain is None: if domain is None:
self._comb_signals.update(signals) self._comb_signals |= signals_bits
else: else:
self._sync_signals.update(signals) self._sync_signals |= signals_bits
self._domain_signals[domain].update(signals) self._domain_signals[domain] |= signals_bits
statements = [] statements = []
for signal in fragment.iter_comb(): for signal in fragment.iter_comb():
@ -468,9 +518,10 @@ class Simulator:
funclet = compiler(statements) funclet = compiler(statements)
def add_funclet(signal, funclet): def add_funclet(signal, funclet):
if signal not in self._funclets: signal_slot = self._signal_slots[signal]
self._funclets[signal] = set() while len(self._funclets) <= signal_slot:
self._funclets[signal].add(funclet) self._funclets.append(set())
self._funclets[signal_slot].add(funclet)
for signal in compiler.sensitivity: for signal in compiler.sensitivity:
add_funclet(signal, funclet) add_funclet(signal, funclet)
@ -479,7 +530,10 @@ class Simulator:
if cd.rst is not None: if cd.rst is not None:
add_funclet(cd.rst, funclet) add_funclet(cd.rst, funclet)
self._user_signals = self._signals - self._comb_signals - self._sync_signals self._user_signals = bitarray(len(self._signals))
self._user_signals.setall(True)
self._user_signals &= ~self._comb_signals
self._user_signals &= ~self._sync_signals
return self return self
@ -489,30 +543,31 @@ class Simulator:
# that need their statements to be reevaluated because the signals changed at the previous # that need their statements to be reevaluated because the signals changed at the previous
# delta cycle. # delta cycle.
funclets = set() funclets = set()
while self._state.curr_dirty: for signal_slot in self._state.flush_curr_dirty():
signal = self._state.curr_dirty.pop() funclets.update(self._funclets[signal_slot])
if signal in self._funclets:
funclets.update(self._funclets[signal])
# Second, compute the values of all signals at the start of the next delta cycle, by # Second, compute the values of all signals at the start of the next delta cycle, by
# running precompiled statements. # running precompiled statements.
for funclet in funclets: for funclet in funclets:
funclet(self._state) funclet(self._state)
def _commit_signal(self, signal, domains): def _commit_signal(self, signal_slot, domains):
"""Perform the driver part of IR processes (aka RTLIL sync), for individual signals.""" """Perform the driver part of IR processes (aka RTLIL sync), for individual signals."""
# Take the computed value (at the start of this delta cycle) of a signal (that could have # Take the computed value (at the start of this delta cycle) of a signal (that could have
# come from an IR process that ran earlier, or modified by a simulator process) and update # come from an IR process that ran earlier, or modified by a simulator process) and update
# the value for this delta cycle. # the value for this delta cycle.
old, new = self._state.commit(signal, self._signal_slots[signal]) old, new = self._state.commit(signal_slot)
if old == new:
return
# If the signal is a clock that triggers synchronous logic, record that fact. # If the signal is a clock that triggers synchronous logic, record that fact.
if (old, new) == (0, 1) and signal in self._domain_triggers: if new == 1 and self._domain_triggers[signal_slot] is not None:
domains.add(self._domain_triggers[signal]) domains.add(self._domain_triggers[signal_slot])
if self._vcd_writer and old != new: if self._vcd_writer:
# Finally, dump the new value to the VCD file. # Finally, dump the new value to the VCD file.
for vcd_signal in self._vcd_signals[signal]: for vcd_signal in self._vcd_signals[signal_slot]:
signal = self._slot_signals[signal_slot]
if signal.decoder: if signal.decoder:
var_value = signal.decoder(new).replace(" ", "_") var_value = signal.decoder(new).replace(" ", "_")
else: else:
@ -524,9 +579,9 @@ class Simulator:
"""Perform the comb part of IR processes (aka RTLIL always).""" """Perform the comb part of IR processes (aka RTLIL always)."""
# Take the computed value (at the start of this delta cycle) of every comb signal and # Take the computed value (at the start of this delta cycle) of every comb signal and
# update the value for this delta cycle. # update the value for this delta cycle.
for signal in self._state.next_dirty: for signal_slot in self._state.iter_next_dirty():
if signal in self._comb_signals: if self._comb_signals[signal_slot]:
self._commit_signal(signal, domains) self._commit_signal(signal_slot, domains)
def _commit_sync_signals(self, domains): def _commit_sync_signals(self, domains):
"""Perform the sync part of IR processes (aka RTLIL posedge).""" """Perform the sync part of IR processes (aka RTLIL posedge)."""
@ -543,9 +598,9 @@ class Simulator:
# Take the computed value (at the start of this delta cycle) of every sync signal # Take the computed value (at the start of this delta cycle) of every sync signal
# in this domain and update the value for this delta cycle. This can trigger more # in this domain and update the value for this delta cycle. This can trigger more
# synchronous logic, so record that. # synchronous logic, so record that.
for signal in self._state.next_dirty: for signal_slot in self._state.iter_next_dirty():
if signal in self._domain_signals[domain]: if self._domain_signals[domain][signal_slot]:
self._commit_signal(signal, domains) self._commit_signal(signal_slot, domains)
# Wake up any simulator processes that wait for a domain tick. # Wake up any simulator processes that wait for a domain tick.
for process, wait_domain in list(self._wait_tick.items()): for process, wait_domain in list(self._wait_tick.items()):
@ -568,7 +623,7 @@ class Simulator:
try: try:
cmd = process.send(None) cmd = process.send(None)
while True: while True:
if isinstance(cmd, Delay): if type(cmd) is Delay:
if cmd.interval is None: if cmd.interval is None:
interval = self._epsilon interval = self._epsilon
else: else:
@ -577,42 +632,53 @@ class Simulator:
self._suspended.add(process) self._suspended.add(process)
break break
elif isinstance(cmd, Tick): elif type(cmd) is Tick:
self._wait_tick[process] = cmd.domain self._wait_tick[process] = cmd.domain
self._suspended.add(process) self._suspended.add(process)
break break
elif isinstance(cmd, Passive): elif type(cmd) is Passive:
self._passive.add(process) self._passive.add(process)
elif type(cmd) is Assign:
lhs_signals = cmd.lhs._lhs_signals()
for signal in lhs_signals:
signal_slot = self._signal_slots[signal]
if not signal in self._signals:
raise ValueError("Process '{}' sent a request to set signal '{!r}', "
"which is not a part of simulation"
.format(self._name_process(process), signal))
if self._comb_signals[signal_slot]:
raise ValueError("Process '{}' sent a request to set signal '{!r}', "
"which is a part of combinatorial assignment in "
"simulation"
.format(self._name_process(process), signal))
if type(cmd.lhs) is Signal and type(cmd.rhs) is Const:
# Fast path.
self._state.set(self._signal_slots[cmd.lhs],
normalize(cmd.rhs.value, cmd.lhs.shape()))
else:
compiler = _StatementCompiler(self._signal_slots)
funclet = compiler(cmd)
funclet(self._state)
domains = set()
for signal in lhs_signals:
self._commit_signal(self._signal_slots[signal], domains)
self._commit_sync_signals(domains)
elif type(cmd) is Signal:
# Fast path.
cmd = process.send(self._state.curr[self._signal_slots[cmd]])
continue
elif isinstance(cmd, Value): elif isinstance(cmd, Value):
compiler = _RHSValueCompiler(self._signal_slots) compiler = _RHSValueCompiler(self._signal_slots)
funclet = compiler(cmd) funclet = compiler(cmd)
cmd = process.send(funclet(self._state)) cmd = process.send(funclet(self._state))
continue continue
elif isinstance(cmd, Assign):
lhs_signals = cmd.lhs._lhs_signals()
for signal in lhs_signals:
if not signal in self._signals:
raise ValueError("Process '{}' sent a request to set signal '{!r}', "
"which is not a part of simulation"
.format(self._name_process(process), signal))
if signal in self._comb_signals:
raise ValueError("Process '{}' sent a request to set signal '{!r}', "
"which is a part of combinatorial assignment in "
"simulation"
.format(self._name_process(process), signal))
compiler = _StatementCompiler(self._signal_slots)
funclet = compiler(cmd)
funclet(self._state)
domains = set()
for signal in lhs_signals:
self._commit_signal(signal, domains)
self._commit_sync_signals(domains)
else: else:
raise TypeError("Received unsupported command '{!r}' from process '{}'" raise TypeError("Received unsupported command '{!r}' from process '{}'"
.format(cmd, self._name_process(process))) .format(cmd, self._name_process(process)))
@ -628,7 +694,7 @@ class Simulator:
def step(self, run_passive=False): def step(self, run_passive=False):
# Are there any delta cycles we should run? # Are there any delta cycles we should run?
if self._state.curr_dirty: if self._state.curr_dirty.any():
# We might run some delta cycles, and we have simulator processes waiting on # We might run some delta cycles, and we have simulator processes waiting on
# a deadline. Take care to not exceed the closest deadline. # a deadline. Take care to not exceed the closest deadline.
if self._wait_deadline and \ if self._wait_deadline and \
@ -638,7 +704,7 @@ class Simulator:
raise DeadlineError("Delta cycles exceeded process deadline; combinatorial loop?") raise DeadlineError("Delta cycles exceeded process deadline; combinatorial loop?")
domains = set() domains = set()
while self._state.curr_dirty: while self._state.curr_dirty.any():
self._update_dirty_signals() self._update_dirty_signals()
self._commit_comb_signals(domains) self._commit_comb_signals(domains)
self._commit_sync_signals(domains) self._commit_sync_signals(domains)
@ -694,12 +760,13 @@ class Simulator:
gtkw_save.zoom_markers(math.log(self._epsilon / self._fastest_clock) - 14) gtkw_save.zoom_markers(math.log(self._epsilon / self._fastest_clock) - 14)
def add_trace(signal, **kwargs): def add_trace(signal, **kwargs):
if signal in self._vcd_names: signal_slot = self._signal_slots[signal]
if self._vcd_names[signal_slot] is not None:
if len(signal) > 1: if len(signal) > 1:
suffix = "[{}:0]".format(len(signal) - 1) suffix = "[{}:0]".format(len(signal) - 1)
else: else:
suffix = "" suffix = ""
gtkw_save.trace(self._vcd_names[signal] + suffix, **kwargs) gtkw_save.trace(self._vcd_names[signal_slot] + suffix, **kwargs)
for domain, cd in self._domains.items(): for domain, cd in self._domains.items():
with gtkw_save.group("d.{}".format(domain)): with gtkw_save.group("d.{}".format(domain)):

View file

@ -14,7 +14,7 @@ setup(
description="Python toolbox for building complex digital hardware", description="Python toolbox for building complex digital hardware",
#long_description="""TODO""", #long_description="""TODO""",
license="BSD", license="BSD",
install_requires=["pyvcd"], install_requires=["pyvcd", "bitarray"],
packages=find_packages(), packages=find_packages(),
project_urls={ project_urls={
#"Documentation": "https://glasgow.readthedocs.io/", #"Documentation": "https://glasgow.readthedocs.io/",