(zilch lang ninja): improve parser performance

This commit is contained in:
puck 2025-05-01 13:20:05 +00:00
parent 499bacd9c8
commit 0cf6d7a9af

View file

@ -60,6 +60,13 @@
(and (char>=? ch #\0) (char<=? ch #\9)) (and (char>=? ch #\0) (char<=? ch #\9))
(char=? ch #\_) (char=? ch #\-) (char=? ch #\_) (char=? ch #\-)
(and (not simple) (char=? ch #\.)))) (and (not simple) (char=? ch #\.))))
(define (is-valid-varname-byte ch simple)
(or
(and (>= ch #x61) (<= ch #x7A))
(and (>= ch #x41) (<= ch #x5A))
(and (>= ch #x30) (<= ch #x39))
(= ch #x5F) (= ch #x2D)
(and (not simple) (= ch #x2E))))
(define (is-valid-eval-string-char ch path) (define (is-valid-eval-string-char ch path)
(not (not
@ -74,6 +81,19 @@
(char=? ch #\:) (char=? ch #\:)
(char=? ch #\|)))))) (char=? ch #\|))))))
(define (is-valid-eval-string-byte ch path)
(not
(or
(= ch 0)
(= ch #x0A)
(= ch #x0D)
(= ch #x24)
(and path
(or
(= ch #x20)
(= ch #x3A)
(= ch #x7C))))))
(define (resolve-evalstring evalstr file rule edge is-command) (define (resolve-evalstring evalstr file rule edge is-command)
(define (find-value val) (define (find-value val)
(define res #f) (define res #f)
@ -198,55 +218,67 @@
(try-resolve (build-rule-rspfile rule) #f) (try-resolve (build-rule-rspfile rule) #f)
(try-resolve (build-rule-rspfile-content rule) #f))) (try-resolve (build-rule-rspfile-content rule) #f)))
(define (bytevector-prefix? bv in-bv index)
(define bv-len (bytevector-length bv))
(define (inner-loop j)
(cond
((>= j bv-len) #t)
((= (bytevector-u8-ref bv j) (bytevector-u8-ref in-bv (+ index j)))
(inner-loop (+ j 1)))
(else #f)))
(if (>= (+ index bv-len) (bytevector-length in-bv))
#f
(inner-loop 0)))
;; Reads a full Ninja file, returning a <build-file> record. ;; Reads a full Ninja file, returning a <build-file> record.
(define (read-ninja-file strval) (define (read-ninja-file strval)
(define i 0) (define i 0)
(define (eat-whitespace) (define (eat-whitespace)
(cond (cond
((>= i (string-length strval))) ((>= i (bytevector-length strval)))
((string-prefix? "$\r\n" strval 0 3 i) (set! i (+ i 3)) (eat-whitespace)) ((bytevector-prefix? #u8(#x24 #x0D #x0A) strval i) (set! i (+ i 3)) (eat-whitespace))
((string-prefix? "$\n" strval 0 2 i) (set! i (+ i 2)) (eat-whitespace)) ((bytevector-prefix? #u8(#x24 #x0A) strval i) (set! i (+ i 2)) (eat-whitespace))
((char=? (string-ref strval i) #\space) (set! i (string-skip strval (lambda (ch) (char=? ch #\space)) i)) (eat-whitespace)))) ((= (bytevector-u8-ref strval i) #x20) (set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) i)) (eat-whitespace))))
(define (read-eval-string-text path) (define (read-eval-string-text path)
(define collected '()) (define collected '())
(define start-i i) (define start-i i)
(define (tick) (define (tick)
(cond (cond
((>= i (string-length strval)) (error "unexpected EOF")) ((>= i (bytevector-length strval)) (error "unexpected EOF"))
((is-valid-eval-string-char (string-ref strval i) path) ((is-valid-eval-string-byte (bytevector-u8-ref strval i) path)
(let ((start-i i)) (let ((start-i i))
(do () ((or (>= i (string-length strval)) (not (is-valid-eval-string-char (string-ref strval i) path)))) (do () ((or (>= i (bytevector-length strval)) (not (is-valid-eval-string-byte (bytevector-u8-ref strval i) path))))
(set! i (+ i 1))) (set! i (+ i 1)))
(set! collected (cons (string-copy strval start-i i) collected))) (set! collected (cons (utf8->string strval start-i i) collected)))
(tick)) (tick))
((and path (member (string-ref strval i) '(#\space #\: #\| #\newline)))) ((and path (member (bytevector-u8-ref strval i) '(#x20 #x3A #x7C #x0A))))
((and (not path) (char=? #\newline (string-ref strval i))) ((and (not path) (= #x0A (bytevector-u8-ref strval i)))
(set! i (+ i 1))) (set! i (+ i 1)))
((string-prefix? "\r\n" strval 0 2 i) ((bytevector-prefix? #u8(#x0D #x0A) strval i)
(unless path (unless path
(set! i (+ i 2)))) (set! i (+ i 2))))
((string-prefix? "$\r\n" strval 0 3 i) ((bytevector-prefix? #u8(#x24 #x0D #x0A) strval i)
(set! i (string-skip strval (lambda (ch) (char=? ch #\space)) (+ i 3))) (set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) (+ i 3)))
(tick)) (tick))
((string-prefix? "$\n" strval 0 2 i) ((bytevector-prefix? #u8(#x24 #x0A) strval i)
(set! i (string-skip strval (lambda (ch) (char=? ch #\space)) (+ i 2))) (set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) (+ i 2)))
(tick)) (tick))
((string-prefix? "${" strval 0 2 i) ((bytevector-prefix? #u8(#x24 #x7B) strval i)
(let ((end-of-varname (string-skip strval (lambda (ch) (is-valid-varname-char ch #f)) (+ i 2)))) (let ((end-of-varname (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #f))) (+ i 2))))
(unless (char=? (string-ref strval end-of-varname) #\}) (error "unexpected non-varname character at index" end-of-varname)) (unless (= (bytevector-u8-ref strval end-of-varname) #x7D) (error "unexpected non-varname character at index" end-of-varname))
(set! collected (cons (cons 'varname (string-copy strval (+ i 2) end-of-varname)) collected)) (set! collected (cons (cons 'varname (utf8->string strval (+ i 2) end-of-varname)) collected))
(set! i (+ end-of-varname 1))) (set! i (+ end-of-varname 1)))
(tick)) (tick))
((or (string-prefix? "$$" strval 0 2 i) (string-prefix? "$:" strval 0 2 i) (string-prefix? "$ " strval 0 2 i)) ((or (bytevector-prefix? #u8(#x24 #x24) strval i) (bytevector-prefix? #u8(#x24 #x3A) strval i) (bytevector-prefix? #u8(#x24 #x20) strval i))
(set! collected (cons (string-copy strval (+ i 1) (+ i 2)) collected)) (set! collected (cons (utf8->string strval (+ i 1) (+ i 2)) collected))
(set! i (+ i 2)) (set! i (+ i 2))
(tick)) (tick))
((and (char=? (string-ref strval i) #\$) (is-valid-varname-char (string-ref strval (+ i 1)) #t)) ((and (= (bytevector-u8-ref strval i) #x24) (is-valid-varname-byte (bytevector-u8-ref strval (+ i 1)) #t))
(let ((end-of-varname (string-skip strval (lambda (ch) (is-valid-varname-char ch #t)) (+ i 1)))) (let ((end-of-varname (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #t))) (+ i 1))))
(set! collected (cons (cons 'varname (string-copy strval (+ i 1) end-of-varname)) collected)) (set! collected (cons (cons 'varname (utf8->string strval (+ i 1) end-of-varname)) collected))
(set! i end-of-varname)) (set! i end-of-varname))
(tick)) (tick))
((char=? (string-ref strval i) #\$) ((= (bytevector-u8-ref strval i) #x24)
(error "bad $-escape at index" i)) (error "bad $-escape at index" i))
(else (error "Unknown character at index" i)))) (else (error "Unknown character at index" i))))
(tick) (tick)
@ -263,41 +295,41 @@
(define (read-token) (define (read-token)
(define start-i i) (define start-i i)
(define post-space (or (string-skip strval (lambda (ch) (char=? ch #\space)) i) i)) (define post-space (or (bytestring-index strval (lambda (ch) (not (= ch #x20))) i) i))
(define token (define token
(cond (cond
((>= i (string-length strval)) ((>= i (bytevector-length strval))
'eof) 'eof)
((string-prefix? "#" strval 0 1 post-space) ((bytevector-prefix? #u8(#x23) strval post-space)
(let ((end-of-comment (or (string-index strval (lambda (ch) (char=? ch #\newline)) post-space) (string-length strval)))) (let ((end-of-comment (or (bytestring-index strval (lambda (ch) (= ch #x0A)) post-space) (bytevector-length strval))))
(set! i end-of-comment) (set! i end-of-comment)
(read-token))) (read-token)))
((string-prefix? "\r\n" strval 0 2 post-space) ((bytevector-prefix? #u8(#x0D #x0A) strval post-space)
(set! i (+ post-space 2)) (set! i (+ post-space 2))
'newline) 'newline)
((string-prefix? "\n" strval 0 1 post-space) ((bytevector-prefix? #u8(#x0A) strval post-space)
(set! i (+ post-space 1)) (set! i (+ post-space 1))
'newline) 'newline)
((> post-space i) ((> post-space i)
'indent) 'indent)
((char=? (string-ref strval i) #\=) ((= (bytevector-u8-ref strval i) #x3D)
(set! i (+ i 1)) (set! i (+ i 1))
'equals) 'equals)
((char=? (string-ref strval i) #\:) ((= (bytevector-u8-ref strval i) #x3A)
(set! i (+ i 1)) (set! i (+ i 1))
'colon) 'colon)
((string-prefix? "|@" strval 0 2 i) ((bytevector-prefix? #u8(#x7C #x40) strval i)
(set! i (+ i 2)) (set! i (+ i 2))
'pipe-at) 'pipe-at)
((string-prefix? "||" strval 0 2 i) ((bytevector-prefix? #u8(#x7C #x7C) strval i)
(set! i (+ i 2)) (set! i (+ i 2))
'pipe-pipe) 'pipe-pipe)
((char=? (string-ref strval i) #\|) ((= (bytevector-u8-ref strval i) #x7C)
(set! i (+ i 1)) (set! i (+ i 1))
'pipe) 'pipe)
((is-valid-varname-char (string-ref strval i) #f) ((is-valid-varname-byte (bytevector-u8-ref strval i) #f)
(let* ((token-end (string-skip strval (lambda (ch) (is-valid-varname-char ch #f)) i)) (let* ((token-end (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #f))) i))
(token (string-copy strval i token-end))) (token (utf8->string strval i token-end)))
(set! i token-end) (set! i token-end)
(cond (cond
((string=? token "build") 'build) ((string=? token "build") 'build)
@ -401,7 +433,7 @@
((pool) (error "todo: pool declaration")) ((pool) (error "todo: pool declaration"))
((newline) (read-toplevel file)) ((newline) (read-toplevel file))
(else (else
(unless (string? token) (error "unexpected" token)) (unless (string? token) (error "unexpected" (list token i)))
(unless (expect-token 'equals) (error "expected =, found" (read-token))) (unless (expect-token 'equals) (error "expected =, found" (read-token)))
(let* ((value (read-eval-string-text #f)) (resolved (resolve-evalstring value file #f #f #f))) (let* ((value (read-eval-string-text #f)) (resolved (resolve-evalstring value file #f #f #f)))
(set-build-file-global-variables! file (mapping-set! (build-file-global-variables file) token resolved))) (set-build-file-global-variables! file (mapping-set! (build-file-global-variables file) token resolved)))