(zilch lang ninja): improve parser performance
This commit is contained in:
parent
499bacd9c8
commit
0cf6d7a9af
1 changed files with 73 additions and 41 deletions
|
|
@ -60,6 +60,13 @@
|
|||
(and (char>=? ch #\0) (char<=? ch #\9))
|
||||
(char=? ch #\_) (char=? ch #\-)
|
||||
(and (not simple) (char=? ch #\.))))
|
||||
(define (is-valid-varname-byte ch simple)
|
||||
(or
|
||||
(and (>= ch #x61) (<= ch #x7A))
|
||||
(and (>= ch #x41) (<= ch #x5A))
|
||||
(and (>= ch #x30) (<= ch #x39))
|
||||
(= ch #x5F) (= ch #x2D)
|
||||
(and (not simple) (= ch #x2E))))
|
||||
|
||||
(define (is-valid-eval-string-char ch path)
|
||||
(not
|
||||
|
|
@ -74,6 +81,19 @@
|
|||
(char=? ch #\:)
|
||||
(char=? ch #\|))))))
|
||||
|
||||
(define (is-valid-eval-string-byte ch path)
|
||||
(not
|
||||
(or
|
||||
(= ch 0)
|
||||
(= ch #x0A)
|
||||
(= ch #x0D)
|
||||
(= ch #x24)
|
||||
(and path
|
||||
(or
|
||||
(= ch #x20)
|
||||
(= ch #x3A)
|
||||
(= ch #x7C))))))
|
||||
|
||||
(define (resolve-evalstring evalstr file rule edge is-command)
|
||||
(define (find-value val)
|
||||
(define res #f)
|
||||
|
|
@ -198,55 +218,67 @@
|
|||
(try-resolve (build-rule-rspfile rule) #f)
|
||||
(try-resolve (build-rule-rspfile-content rule) #f)))
|
||||
|
||||
(define (bytevector-prefix? bv in-bv index)
|
||||
(define bv-len (bytevector-length bv))
|
||||
(define (inner-loop j)
|
||||
(cond
|
||||
((>= j bv-len) #t)
|
||||
((= (bytevector-u8-ref bv j) (bytevector-u8-ref in-bv (+ index j)))
|
||||
(inner-loop (+ j 1)))
|
||||
(else #f)))
|
||||
(if (>= (+ index bv-len) (bytevector-length in-bv))
|
||||
#f
|
||||
(inner-loop 0)))
|
||||
|
||||
;; Reads a full Ninja file, returning a <build-file> record.
|
||||
(define (read-ninja-file strval)
|
||||
(define i 0)
|
||||
(define (eat-whitespace)
|
||||
(cond
|
||||
((>= i (string-length strval)))
|
||||
((string-prefix? "$\r\n" strval 0 3 i) (set! i (+ i 3)) (eat-whitespace))
|
||||
((string-prefix? "$\n" strval 0 2 i) (set! i (+ i 2)) (eat-whitespace))
|
||||
((char=? (string-ref strval i) #\space) (set! i (string-skip strval (lambda (ch) (char=? ch #\space)) i)) (eat-whitespace))))
|
||||
((>= i (bytevector-length strval)))
|
||||
((bytevector-prefix? #u8(#x24 #x0D #x0A) strval i) (set! i (+ i 3)) (eat-whitespace))
|
||||
((bytevector-prefix? #u8(#x24 #x0A) strval i) (set! i (+ i 2)) (eat-whitespace))
|
||||
((= (bytevector-u8-ref strval i) #x20) (set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) i)) (eat-whitespace))))
|
||||
(define (read-eval-string-text path)
|
||||
(define collected '())
|
||||
(define start-i i)
|
||||
(define (tick)
|
||||
(cond
|
||||
((>= i (string-length strval)) (error "unexpected EOF"))
|
||||
((is-valid-eval-string-char (string-ref strval i) path)
|
||||
((>= i (bytevector-length strval)) (error "unexpected EOF"))
|
||||
((is-valid-eval-string-byte (bytevector-u8-ref strval i) path)
|
||||
(let ((start-i i))
|
||||
(do () ((or (>= i (string-length strval)) (not (is-valid-eval-string-char (string-ref strval i) path))))
|
||||
(do () ((or (>= i (bytevector-length strval)) (not (is-valid-eval-string-byte (bytevector-u8-ref strval i) path))))
|
||||
(set! i (+ i 1)))
|
||||
(set! collected (cons (string-copy strval start-i i) collected)))
|
||||
(set! collected (cons (utf8->string strval start-i i) collected)))
|
||||
(tick))
|
||||
((and path (member (string-ref strval i) '(#\space #\: #\| #\newline))))
|
||||
((and (not path) (char=? #\newline (string-ref strval i)))
|
||||
((and path (member (bytevector-u8-ref strval i) '(#x20 #x3A #x7C #x0A))))
|
||||
((and (not path) (= #x0A (bytevector-u8-ref strval i)))
|
||||
(set! i (+ i 1)))
|
||||
((string-prefix? "\r\n" strval 0 2 i)
|
||||
((bytevector-prefix? #u8(#x0D #x0A) strval i)
|
||||
(unless path
|
||||
(set! i (+ i 2))))
|
||||
((string-prefix? "$\r\n" strval 0 3 i)
|
||||
(set! i (string-skip strval (lambda (ch) (char=? ch #\space)) (+ i 3)))
|
||||
((bytevector-prefix? #u8(#x24 #x0D #x0A) strval i)
|
||||
(set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) (+ i 3)))
|
||||
(tick))
|
||||
((string-prefix? "$\n" strval 0 2 i)
|
||||
(set! i (string-skip strval (lambda (ch) (char=? ch #\space)) (+ i 2)))
|
||||
((bytevector-prefix? #u8(#x24 #x0A) strval i)
|
||||
(set! i (bytestring-index strval (lambda (ch) (not (= ch #x20))) (+ i 2)))
|
||||
(tick))
|
||||
((string-prefix? "${" strval 0 2 i)
|
||||
(let ((end-of-varname (string-skip strval (lambda (ch) (is-valid-varname-char ch #f)) (+ i 2))))
|
||||
(unless (char=? (string-ref strval end-of-varname) #\}) (error "unexpected non-varname character at index" end-of-varname))
|
||||
(set! collected (cons (cons 'varname (string-copy strval (+ i 2) end-of-varname)) collected))
|
||||
((bytevector-prefix? #u8(#x24 #x7B) strval i)
|
||||
(let ((end-of-varname (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #f))) (+ i 2))))
|
||||
(unless (= (bytevector-u8-ref strval end-of-varname) #x7D) (error "unexpected non-varname character at index" end-of-varname))
|
||||
(set! collected (cons (cons 'varname (utf8->string strval (+ i 2) end-of-varname)) collected))
|
||||
(set! i (+ end-of-varname 1)))
|
||||
(tick))
|
||||
((or (string-prefix? "$$" strval 0 2 i) (string-prefix? "$:" strval 0 2 i) (string-prefix? "$ " strval 0 2 i))
|
||||
(set! collected (cons (string-copy strval (+ i 1) (+ i 2)) collected))
|
||||
((or (bytevector-prefix? #u8(#x24 #x24) strval i) (bytevector-prefix? #u8(#x24 #x3A) strval i) (bytevector-prefix? #u8(#x24 #x20) strval i))
|
||||
(set! collected (cons (utf8->string strval (+ i 1) (+ i 2)) collected))
|
||||
(set! i (+ i 2))
|
||||
(tick))
|
||||
((and (char=? (string-ref strval i) #\$) (is-valid-varname-char (string-ref strval (+ i 1)) #t))
|
||||
(let ((end-of-varname (string-skip strval (lambda (ch) (is-valid-varname-char ch #t)) (+ i 1))))
|
||||
(set! collected (cons (cons 'varname (string-copy strval (+ i 1) end-of-varname)) collected))
|
||||
((and (= (bytevector-u8-ref strval i) #x24) (is-valid-varname-byte (bytevector-u8-ref strval (+ i 1)) #t))
|
||||
(let ((end-of-varname (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #t))) (+ i 1))))
|
||||
(set! collected (cons (cons 'varname (utf8->string strval (+ i 1) end-of-varname)) collected))
|
||||
(set! i end-of-varname))
|
||||
(tick))
|
||||
((char=? (string-ref strval i) #\$)
|
||||
((= (bytevector-u8-ref strval i) #x24)
|
||||
(error "bad $-escape at index" i))
|
||||
(else (error "Unknown character at index" i))))
|
||||
(tick)
|
||||
|
|
@ -263,41 +295,41 @@
|
|||
|
||||
(define (read-token)
|
||||
(define start-i i)
|
||||
(define post-space (or (string-skip strval (lambda (ch) (char=? ch #\space)) i) i))
|
||||
(define post-space (or (bytestring-index strval (lambda (ch) (not (= ch #x20))) i) i))
|
||||
(define token
|
||||
(cond
|
||||
((>= i (string-length strval))
|
||||
((>= i (bytevector-length strval))
|
||||
'eof)
|
||||
((string-prefix? "#" strval 0 1 post-space)
|
||||
(let ((end-of-comment (or (string-index strval (lambda (ch) (char=? ch #\newline)) post-space) (string-length strval))))
|
||||
((bytevector-prefix? #u8(#x23) strval post-space)
|
||||
(let ((end-of-comment (or (bytestring-index strval (lambda (ch) (= ch #x0A)) post-space) (bytevector-length strval))))
|
||||
(set! i end-of-comment)
|
||||
(read-token)))
|
||||
((string-prefix? "\r\n" strval 0 2 post-space)
|
||||
((bytevector-prefix? #u8(#x0D #x0A) strval post-space)
|
||||
(set! i (+ post-space 2))
|
||||
'newline)
|
||||
((string-prefix? "\n" strval 0 1 post-space)
|
||||
((bytevector-prefix? #u8(#x0A) strval post-space)
|
||||
(set! i (+ post-space 1))
|
||||
'newline)
|
||||
((> post-space i)
|
||||
'indent)
|
||||
((char=? (string-ref strval i) #\=)
|
||||
((= (bytevector-u8-ref strval i) #x3D)
|
||||
(set! i (+ i 1))
|
||||
'equals)
|
||||
((char=? (string-ref strval i) #\:)
|
||||
((= (bytevector-u8-ref strval i) #x3A)
|
||||
(set! i (+ i 1))
|
||||
'colon)
|
||||
((string-prefix? "|@" strval 0 2 i)
|
||||
((bytevector-prefix? #u8(#x7C #x40) strval i)
|
||||
(set! i (+ i 2))
|
||||
'pipe-at)
|
||||
((string-prefix? "||" strval 0 2 i)
|
||||
((bytevector-prefix? #u8(#x7C #x7C) strval i)
|
||||
(set! i (+ i 2))
|
||||
'pipe-pipe)
|
||||
((char=? (string-ref strval i) #\|)
|
||||
((= (bytevector-u8-ref strval i) #x7C)
|
||||
(set! i (+ i 1))
|
||||
'pipe)
|
||||
((is-valid-varname-char (string-ref strval i) #f)
|
||||
(let* ((token-end (string-skip strval (lambda (ch) (is-valid-varname-char ch #f)) i))
|
||||
(token (string-copy strval i token-end)))
|
||||
((is-valid-varname-byte (bytevector-u8-ref strval i) #f)
|
||||
(let* ((token-end (bytestring-index strval (lambda (ch) (not (is-valid-varname-byte ch #f))) i))
|
||||
(token (utf8->string strval i token-end)))
|
||||
(set! i token-end)
|
||||
(cond
|
||||
((string=? token "build") 'build)
|
||||
|
|
@ -401,7 +433,7 @@
|
|||
((pool) (error "todo: pool declaration"))
|
||||
((newline) (read-toplevel file))
|
||||
(else
|
||||
(unless (string? token) (error "unexpected" token))
|
||||
(unless (string? token) (error "unexpected" (list token i)))
|
||||
(unless (expect-token 'equals) (error "expected =, found" (read-token)))
|
||||
(let* ((value (read-eval-string-text #f)) (resolved (resolve-evalstring value file #f #f #f)))
|
||||
(set-build-file-global-variables! file (mapping-set! (build-file-global-variables file) token resolved)))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue