(zilch lang ninja): improve parser performance

This commit is contained in:
puck 2025-05-01 13:20:05 +00:00
parent 499bacd9c8
commit 0cf6d7a9af

View file

@ -60,6 +60,13 @@
(and (char>=? ch #\0) (char<=? ch #\9))
(char=? ch #\_) (char=? ch #\-)
(and (not simple) (char=? ch #\.))))
(define (is-valid-varname-byte ch simple)
(or
(and (>= ch #x61) (<= ch #x7A))
(and (>= ch #x41) (<= ch #x5A))
(and (>= ch #x30) (<= ch #x39))
(= ch #x5F) (= ch #x2D)
(and (not simple) (= ch #x2E))))
(define (is-valid-eval-string-char ch path)
(not
@ -74,6 +81,19 @@
(char=? ch #\:)
(char=? ch #\|))))))
(define (is-valid-eval-string-byte ch path)
(not
(or
(= ch 0)
(= ch #x0A)
(= ch #x0D)
(= ch #x24)
(and path
(or
(= ch #x20)
(= ch #x3A)
(= ch #x7C))))))
(define (resolve-evalstring evalstr file rule edge is-command)
(define (find-value val)
(define res #f)
@ -198,55 +218,67 @@
(try-resolve (build-rule-rspfile rule) #f)
(try-resolve (build-rule-rspfile-content rule) #f)))
(define (bytevector-prefix? bv in-bv index)
(define bv-len (bytevector-length bv))
(define (inner-loop j)
(cond
((>= j bv-len) #t)
((= (bytevector-u8-ref bv j) (bytevector-u8-ref in-bv (+ index j)))
(inner-loop (+ j 1)))
(else #f)))
(if (>= (+ index bv-len) (bytevector-length in-bv))
#f
(inner-loop 0)))
;; Reads a full Ninja file, returning a <build-file> record.
(define (read-ninja-file strval)
(define i 0)
(define (eat-whitespace)
(cond
((>= i (string-length strval)))
((string-prefix? "$\r\n" strval 0 3 i) (set! i (+ i 3)) (eat-whitespace))
((string-prefix? "$\n" strval 0 2 i) (set! i (+ i 2)) (eat-whitespace))
((char=? (string-ref strval i) #\space) (set! i (string-skip strval (lambda (ch) (char=? ch #\space)) i)) (eat-whitespace))))
((>= i (bytevector-length strval)))
((bytevector-prefix? #u8(#x24 #x0D #x0A) strval i) (set! i (+ i 3)) (eat-whitespace))
((bytevector-prefix? #u8(#x24 #x0A) strval i) (set! i (+ i 2)) (eat-whitespace))
((= (bytevector-u8-ref strval i) #x20) (set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) i)) (eat-whitespace))))
(define (read-eval-string-text path)
(define collected '())
(define start-i i)
(define (tick)
(cond
((>= i (string-length strval)) (error "unexpected EOF"))
((is-valid-eval-string-char (string-ref strval i) path)
((>= i (bytevector-length strval)) (error "unexpected EOF"))
((is-valid-eval-string-byte (bytevector-u8-ref strval i) path)
(let ((start-i i))
(do () ((or (>= i (string-length strval)) (not (is-valid-eval-string-char (string-ref strval i) path))))
(do () ((or (>= i (bytevector-length strval)) (not (is-valid-eval-string-byte (bytevector-u8-ref strval i) path))))
(set! i (+ i 1)))
(set! collected (cons (string-copy strval start-i i) collected)))
(set! collected (cons (utf8->string strval start-i i) collected)))
(tick))
((and path (member (string-ref strval i) '(#\space #\: #\| #\newline))))
((and (not path) (char=? #\newline (string-ref strval i)))
((and path (member (bytevector-u8-ref strval i) '(#x20 #x3A #x7C #x0A))))
((and (not path) (= #x0A (bytevector-u8-ref strval i)))
(set! i (+ i 1)))
((string-prefix? "\r\n" strval 0 2 i)
((bytevector-prefix? #u8(#x0D #x0A) strval i)
(unless path
(set! i (+ i 2))))
((string-prefix? "$\r\n" strval 0 3 i)
(set! i (string-skip strval (lambda (ch) (char=? ch #\space)) (+ i 3)))
((bytevector-prefix? #u8(#x24 #x0D #x0A) strval i)
(set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) (+ i 3)))
(tick))
((string-prefix? "$\n" strval 0 2 i)
(set! i (string-skip strval (lambda (ch) (char=? ch #\space)) (+ i 2)))
((bytevector-prefix? #u8(#x24 #x0A) strval i)
(set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) (+ i 2)))
(tick))
((string-prefix? "${" strval 0 2 i)
(let ((end-of-varname (string-skip strval (lambda (ch) (is-valid-varname-char ch #f)) (+ i 2))))
(unless (char=? (string-ref strval end-of-varname) #\}) (error "unexpected non-varname character at index" end-of-varname))
(set! collected (cons (cons 'varname (string-copy strval (+ i 2) end-of-varname)) collected))
((bytevector-prefix? #u8(#x24 #x7B) strval i)
(let ((end-of-varname (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #f))) (+ i 2))))
(unless (= (bytevector-u8-ref strval end-of-varname) #x7D) (error "unexpected non-varname character at index" end-of-varname))
(set! collected (cons (cons 'varname (utf8->string strval (+ i 2) end-of-varname)) collected))
(set! i (+ end-of-varname 1)))
(tick))
((or (string-prefix? "$$" strval 0 2 i) (string-prefix? "$:" strval 0 2 i) (string-prefix? "$ " strval 0 2 i))
(set! collected (cons (string-copy strval (+ i 1) (+ i 2)) collected))
((or (bytevector-prefix? #u8(#x24 #x24) strval i) (bytevector-prefix? #u8(#x24 #x3A) strval i) (bytevector-prefix? #u8(#x24 #x20) strval i))
(set! collected (cons (utf8->string strval (+ i 1) (+ i 2)) collected))
(set! i (+ i 2))
(tick))
((and (char=? (string-ref strval i) #\$) (is-valid-varname-char (string-ref strval (+ i 1)) #t))
(let ((end-of-varname (string-skip strval (lambda (ch) (is-valid-varname-char ch #t)) (+ i 1))))
(set! collected (cons (cons 'varname (string-copy strval (+ i 1) end-of-varname)) collected))
((and (= (bytevector-u8-ref strval i) #x24) (is-valid-varname-byte (bytevector-u8-ref strval (+ i 1)) #t))
(let ((end-of-varname (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #t))) (+ i 1))))
(set! collected (cons (cons 'varname (utf8->string strval (+ i 1) end-of-varname)) collected))
(set! i end-of-varname))
(tick))
((char=? (string-ref strval i) #\$)
((= (bytevector-u8-ref strval i) #x24)
(error "bad $-escape at index" i))
(else (error "Unknown character at index" i))))
(tick)
@ -263,41 +295,41 @@
(define (read-token)
(define start-i i)
(define post-space (or (string-skip strval (lambda (ch) (char=? ch #\space)) i) i))
(define post-space (or (bytestring-index strval (lambda (ch) (not (= ch #x20))) i) i))
(define token
(cond
((>= i (string-length strval))
((>= i (bytevector-length strval))
'eof)
((string-prefix? "#" strval 0 1 post-space)
(let ((end-of-comment (or (string-index strval (lambda (ch) (char=? ch #\newline)) post-space) (string-length strval))))
((bytevector-prefix? #u8(#x23) strval post-space)
(let ((end-of-comment (or (bytestring-index strval (lambda (ch) (= ch #x0A)) post-space) (bytevector-length strval))))
(set! i end-of-comment)
(read-token)))
((string-prefix? "\r\n" strval 0 2 post-space)
((bytevector-prefix? #u8(#x0D #x0A) strval post-space)
(set! i (+ post-space 2))
'newline)
((string-prefix? "\n" strval 0 1 post-space)
((bytevector-prefix? #u8(#x0A) strval post-space)
(set! i (+ post-space 1))
'newline)
((> post-space i)
'indent)
((char=? (string-ref strval i) #\=)
((= (bytevector-u8-ref strval i) #x3D)
(set! i (+ i 1))
'equals)
((char=? (string-ref strval i) #\:)
((= (bytevector-u8-ref strval i) #x3A)
(set! i (+ i 1))
'colon)
((string-prefix? "|@" strval 0 2 i)
((bytevector-prefix? #u8(#x7C #x40) strval i)
(set! i (+ i 2))
'pipe-at)
((string-prefix? "||" strval 0 2 i)
((bytevector-prefix? #u8(#x7C #x7C) strval i)
(set! i (+ i 2))
'pipe-pipe)
((char=? (string-ref strval i) #\|)
((= (bytevector-u8-ref strval i) #x7C)
(set! i (+ i 1))
'pipe)
((is-valid-varname-char (string-ref strval i) #f)
(let* ((token-end (string-skip strval (lambda (ch) (is-valid-varname-char ch #f)) i))
(token (string-copy strval i token-end)))
((is-valid-varname-byte (bytevector-u8-ref strval i) #f)
(let* ((token-end (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #f))) i))
(token (utf8->string strval i token-end)))
(set! i token-end)
(cond
((string=? token "build") 'build)
@ -401,7 +433,7 @@
((pool) (error "todo: pool declaration"))
((newline) (read-toplevel file))
(else
(unless (string? token) (error "unexpected" token))
(unless (string? token) (error "unexpected" (list token i)))
(unless (expect-token 'equals) (error "expected =, found" (read-token)))
(let* ((value (read-eval-string-text #f)) (resolved (resolve-evalstring value file #f #f #f)))
(set-build-file-global-variables! file (mapping-set! (build-file-global-variables file) token resolved)))