
The overhead of coroutine processes is fairly high. A clock driver
implemented through a coroutine process is mostly overhead. This was
partially addressed in commit 2398b792
by microoptimizing yielding.
This commit eliminates the coroutine process overhead completely by
introducing dedicated clock processes. It also simplifies the logic
to a simple toggle.
This change improves runtime by about 12% on Minerva SRAM SoC.
36 lines
802 B
Python
36 lines
802 B
Python
import inspect
|
|
|
|
from ._core import Process
|
|
|
|
|
|
__all__ = ["PyClockProcess"]
|
|
|
|
|
|
class PyClockProcess(Process):
|
|
def __init__(self, state, signal, *, phase, period):
|
|
assert len(signal) == 1
|
|
|
|
self.state = state
|
|
self.slot = self.state.get_signal(signal)
|
|
self.phase = phase
|
|
self.period = period
|
|
|
|
self.reset()
|
|
|
|
def reset(self):
|
|
self.runnable = True
|
|
self.passive = True
|
|
self.initial = True
|
|
|
|
def run(self):
|
|
if self.initial:
|
|
self.initial = False
|
|
self.state.timeline.delay(self.phase, self)
|
|
|
|
else:
|
|
clk_state = self.state.slots[self.slot]
|
|
clk_state.set(not clk_state.curr)
|
|
self.state.timeline.delay(self.period / 2, self)
|
|
|
|
self.runnable = False
|