@@ -8,6 +8,18 @@ import (
88 "strings"
99)
1010
11+ type scannerOptions struct {
12+ extensions map [string ]ScannerExt
13+ }
14+
15+ type ScannerOption interface {
16+ applyScannerOptions (options * scannerOptions )
17+ }
18+
19+ type scannerOptionFunc func (* scannerOptions )
20+
21+ func (opt scannerOptionFunc ) applyScanner (opts * scannerOptions ) { opt (opts ) }
22+
1123// Token is a lexical token of the NGINX configuration syntax.
1224type Token struct {
1325 // Text is the string corresponding to the token. It could be a directive or symbol. The value is the actual token
@@ -20,6 +32,8 @@ type Token struct {
2032 IsQuoted bool
2133}
2234
35+ func (t Token ) String () string { return fmt .Sprintf ("{%d, %s, %t}" , t .Line , t .Text , t .IsQuoted ) }
36+
2337type scannerError struct {
2438 msg string
2539 line int
@@ -52,23 +66,33 @@ func LineNumber(err error) (int, bool) {
5266//
5367// Use NewScanner to construct a Scanner.
5468type Scanner struct {
55- scanner * bufio.Scanner
56- lineno int
57- tokenStartLine int
58- tokenDepth int
59- repeateSpecialChar bool // only '}' can be repeated
60- prev string
61- err error
69+ scanner * bufio.Scanner
70+ lineno int
71+ tokenStartLine int
72+ tokenDepth int
73+ repeateSpecialChar bool // only '}' can be repeated
74+ nextTokenIsDirective bool
75+ prev string
76+ err error
77+ options * scannerOptions
78+ ext Tokenizer
6279}
6380
6481// NewScanner returns a new Scanner to read from r.
65- func NewScanner (r io.Reader ) * Scanner {
82+ func NewScanner (r io.Reader , options ... ScannerOption ) * Scanner {
83+ opts := & scannerOptions {}
84+ for _ , opt := range options {
85+ opt .applyScannerOptions (opts )
86+ }
87+
6688 s := & Scanner {
67- scanner : bufio .NewScanner (r ),
68- lineno : 1 ,
69- tokenStartLine : 1 ,
70- tokenDepth : 0 ,
71- repeateSpecialChar : false ,
89+ scanner : bufio .NewScanner (r ),
90+ lineno : 1 ,
91+ tokenStartLine : 1 ,
92+ tokenDepth : 0 ,
93+ repeateSpecialChar : false ,
94+ nextTokenIsDirective : true ,
95+ options : opts ,
7296 }
7397
7498 s .scanner .Split (bufio .ScanRunes )
@@ -93,6 +117,20 @@ func (s *Scanner) setErr(err error) {
93117// Scan reads the next token from source and returns it.. It returns io.EOF at the end of the source. Scanner errors are
94118// returned when encountered.
95119func (s * Scanner ) Scan () (Token , error ) { //nolint: funlen, gocognit, gocyclo
120+ if s .ext != nil {
121+ t , err := s .ext .Next ()
122+ if err != nil {
123+ if ! errors .Is (err , TokenizerDone ) {
124+ s .setErr (err )
125+ return Token {}, s .err
126+ }
127+
128+ s .ext = nil
129+ } else {
130+ return t , nil
131+ }
132+ }
133+
96134 var tok strings.Builder
97135
98136 lexState := skipSpace
@@ -129,6 +167,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
129167 r = nextRune
130168 if isEOL (r ) {
131169 s .lineno ++
170+ s .nextTokenIsDirective = true
132171 }
133172 default :
134173 readNext = true
@@ -149,6 +188,16 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
149188 r = "\\ " + r
150189 }
151190
191+ if tok .Len () > 0 {
192+ t := tok .String ()
193+ if s .nextTokenIsDirective {
194+ if ext , ok := s .options .extensions [t ]; ok {
195+ s .ext = ext .Tokenizer (& SubScanner {parent : s , tokenLine : s .tokenStartLine }, t )
196+ return Token {Text : t , Line : s .tokenStartLine }, nil
197+ }
198+ }
199+ }
200+
152201 switch lexState {
153202 case skipSpace :
154203 if ! isSpace (r ) {
@@ -166,11 +215,13 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
166215 tok .WriteString (r )
167216 lexState = inComment
168217 s .tokenStartLine = s .lineno
218+ s .nextTokenIsDirective = false
169219 continue
170220 }
171221 }
172222
173223 if isSpace (r ) {
224+ s .nextTokenIsDirective = false
174225 return Token {Text : tok .String (), Line : s .tokenStartLine }, nil
175226 }
176227
@@ -179,6 +230,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
179230 tok .WriteString (r )
180231 lexState = inVar
181232 s .repeateSpecialChar = false
233+ s .nextTokenIsDirective = false
182234 continue
183235 }
184236
@@ -223,6 +275,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
223275 }
224276
225277 tok .WriteString (r )
278+ s .nextTokenIsDirective = true
226279 return Token {Text : tok .String (), Line : s .tokenStartLine }, nil
227280 }
228281
@@ -250,3 +303,51 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
250303 }
251304 }
252305}
306+
307+ // ScannerExt is the interface that describes an extension for the [Scanner]. Scanner extensions enable scanning of
308+ // configurations that contain syntaxes that do not follow the usual grammar.
309+ type ScannerExt interface {
310+ Tokenizer (s * SubScanner , matchedToken string ) Tokenizer
311+ }
312+
313+ // TokenizerDone is returned by [Tokenizer] when tokenization is complete.
314+ var TokenizerDone = errors .New ("done" )
315+
316+ // Tokenizer is the interface that wraps the Next method.
317+ //
318+ // Next returns the next token scanned from the NGINX configuration or an error if the configuration cannot be
319+ // tokenized. Return the special error, [TokenizerDone] when finished tokenizing.
320+ type Tokenizer interface {
321+ Next () (Token , error )
322+ }
323+
324+ // LexerScanner is a compatibility layer between Lexers and Scanner.
325+ type LexerScanner struct {
326+ lexer Lexer
327+ scanner * SubScanner
328+ matchedToken string
329+ ch <- chan NgxToken
330+ }
331+
332+ func (s * LexerScanner ) Tokenizer (scanner * SubScanner , matchedtoken string ) Tokenizer {
333+ s .scanner = scanner
334+ s .matchedToken = matchedtoken
335+ return s
336+ }
337+
338+ func (s * LexerScanner ) Next () (Token , error ) {
339+ if s .ch == nil {
340+ s .ch = s .lexer .Lex (s .scanner , s .matchedToken )
341+ }
342+
343+ ngxTok , ok := <- s .ch
344+ if ! ok {
345+ return Token {}, TokenizerDone
346+ }
347+
348+ if ngxTok .Error != nil {
349+ return Token {}, newScannerErrf (ngxTok .Line , ngxTok .Error .Error ())
350+ }
351+
352+ return Token {Text : ngxTok .Value , Line : ngxTok .Line , IsQuoted : ngxTok .IsQuoted }, nil
353+ }
0 commit comments