@@ -8,6 +8,14 @@ import (
88 "strings"
99)
1010
11+ type scannerOptions struct {
12+ extensions map [string ]ScannerExt
13+ }
14+
15+ type ScannerOption interface {
16+ applyScannerOptions (options * scannerOptions )
17+ }
18+
1119// Token is a lexical token of the NGINX configuration syntax.
1220type Token struct {
1321 // Text is the string corresponding to the token. It could be a directive or symbol. The value is the actual token
@@ -20,6 +28,8 @@ type Token struct {
2028 IsQuoted bool
2129}
2230
31+ func (t Token ) String () string { return fmt .Sprintf ("{%d, %s, %t}" , t .Line , t .Text , t .IsQuoted ) }
32+
2333type scannerError struct {
2434 msg string
2535 line int
@@ -52,23 +62,33 @@ func LineNumber(err error) (int, bool) {
5262//
5363// Use NewScanner to construct a Scanner.
5464type Scanner struct {
55- scanner * bufio.Scanner
56- lineno int
57- tokenStartLine int
58- tokenDepth int
59- repeateSpecialChar bool // only '}' can be repeated
60- prev string
61- err error
65+ scanner * bufio.Scanner
66+ lineno int
67+ tokenStartLine int
68+ tokenDepth int
69+ repeateSpecialChar bool // only '}' can be repeated
70+ nextTokenIsDirective bool
71+ prev string
72+ err error
73+ options * scannerOptions
74+ ext Tokenizer
6275}
6376
6477// NewScanner returns a new Scanner to read from r.
65- func NewScanner (r io.Reader ) * Scanner {
78+ func NewScanner (r io.Reader , options ... ScannerOption ) * Scanner {
79+ opts := & scannerOptions {}
80+ for _ , opt := range options {
81+ opt .applyScannerOptions (opts )
82+ }
83+
6684 s := & Scanner {
67- scanner : bufio .NewScanner (r ),
68- lineno : 1 ,
69- tokenStartLine : 1 ,
70- tokenDepth : 0 ,
71- repeateSpecialChar : false ,
85+ scanner : bufio .NewScanner (r ),
86+ lineno : 1 ,
87+ tokenStartLine : 1 ,
88+ tokenDepth : 0 ,
89+ repeateSpecialChar : false ,
90+ nextTokenIsDirective : true ,
91+ options : opts ,
7292 }
7393
7494 s .scanner .Split (bufio .ScanRunes )
@@ -92,7 +112,21 @@ func (s *Scanner) setErr(err error) {
92112
93113// Scan reads the next token from source and returns it.. It returns io.EOF at the end of the source. Scanner errors are
94114// returned when encountered.
95- func (s * Scanner ) Scan () (Token , error ) { //nolint: funlen, gocognit, gocyclo
115+ func (s * Scanner ) Scan () (Token , error ) { //nolint: funlen, gocognit, gocyclo, maintidx // sorry
116+ if s .ext != nil {
117+ t , err := s .ext .Next ()
118+ if err != nil {
119+ if ! errors .Is (err , ErrTokenizerDone ) {
120+ s .setErr (err )
121+ return Token {}, s .err
122+ }
123+
124+ s .ext = nil
125+ } else {
126+ return t , nil
127+ }
128+ }
129+
96130 var tok strings.Builder
97131
98132 lexState := skipSpace
@@ -129,6 +163,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
129163 r = nextRune
130164 if isEOL (r ) {
131165 s .lineno ++
166+ s .nextTokenIsDirective = true
132167 }
133168 default :
134169 readNext = true
@@ -149,6 +184,16 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
149184 r = "\\ " + r
150185 }
151186
187+ if tok .Len () > 0 {
188+ t := tok .String ()
189+ if s .nextTokenIsDirective {
190+ if ext , ok := s .options .extensions [t ]; ok {
191+ s .ext = ext .Tokenizer (& SubScanner {parent : s , tokenLine : s .tokenStartLine }, t )
192+ return Token {Text : t , Line : s .tokenStartLine }, nil
193+ }
194+ }
195+ }
196+
152197 switch lexState {
153198 case skipSpace :
154199 if ! isSpace (r ) {
@@ -166,11 +211,13 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
166211 tok .WriteString (r )
167212 lexState = inComment
168213 s .tokenStartLine = s .lineno
214+ s .nextTokenIsDirective = false
169215 continue
170216 }
171217 }
172218
173219 if isSpace (r ) {
220+ s .nextTokenIsDirective = false
174221 return Token {Text : tok .String (), Line : s .tokenStartLine }, nil
175222 }
176223
@@ -179,6 +226,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
179226 tok .WriteString (r )
180227 lexState = inVar
181228 s .repeateSpecialChar = false
229+ s .nextTokenIsDirective = false
182230 continue
183231 }
184232
@@ -223,6 +271,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
223271 }
224272
225273 tok .WriteString (r )
274+ s .nextTokenIsDirective = true
226275 return Token {Text : tok .String (), Line : s .tokenStartLine }, nil
227276 }
228277
@@ -250,3 +299,51 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
250299 }
251300 }
252301}
302+
303+ // ScannerExt is the interface that describes an extension for the [Scanner]. Scanner extensions enable scanning of
304+ // configurations that contain syntaxes that do not follow the usual grammar.
305+ type ScannerExt interface {
306+ Tokenizer (s * SubScanner , matchedToken string ) Tokenizer
307+ }
308+
309+ // ErrTokenizerDone is returned by [Tokenizer] when tokenization is complete.
310+ var ErrTokenizerDone = errors .New ("done" )
311+
312+ // Tokenizer is the interface that wraps the Next method.
313+ //
314+ // Next returns the next token scanned from the NGINX configuration or an error if the configuration cannot be
315+ // tokenized. Return the special error, [ErrTokenizerDone] when finished tokenizing.
316+ type Tokenizer interface {
317+ Next () (Token , error )
318+ }
319+
320+ // LexerScanner is a compatibility layer between Lexers and Scanner.
321+ type LexerScanner struct {
322+ lexer Lexer
323+ scanner * SubScanner
324+ matchedToken string
325+ ch <- chan NgxToken
326+ }
327+
328+ func (s * LexerScanner ) Tokenizer (scanner * SubScanner , matchedtoken string ) Tokenizer {
329+ s .scanner = scanner
330+ s .matchedToken = matchedtoken
331+ return s
332+ }
333+
334+ func (s * LexerScanner ) Next () (Token , error ) {
335+ if s .ch == nil {
336+ s .ch = s .lexer .Lex (s .scanner , s .matchedToken )
337+ }
338+
339+ ngxTok , ok := <- s .ch
340+ if ! ok {
341+ return Token {}, ErrTokenizerDone
342+ }
343+
344+ if ngxTok .Error != nil {
345+ return Token {}, newScannerErrf (ngxTok .Line , ngxTok .Error .Error ())
346+ }
347+
348+ return Token {Text : ngxTok .Value , Line : ngxTok .Line , IsQuoted : ngxTok .IsQuoted }, nil
349+ }
0 commit comments