11#include "tree_sitter/alloc.h"
2+ #include "tree_sitter/array.h"
23#include "tree_sitter/parser.h"
34#include <ctype.h>
45#include <wctype.h>
@@ -13,10 +14,12 @@ enum TokenType {
1314 END_OF_STATEMENT ,
1415 PREPROC_UNARY_OPERATOR ,
1516 HOLLERITH_CONSTANT ,
17+ MACRO_IDENTIFIER ,
1618};
1719
1820typedef struct {
1921 bool in_line_continuation ;
22+ Array (char * ) MacroIdentifiers ;
2023} Scanner ;
2124
2225typedef enum {
@@ -301,31 +304,43 @@ static bool scan_end_line_continuation(Scanner *scanner, TSLexer *lexer) {
301304 return true;
302305}
303306
304- static bool scan_string_literal_kind (TSLexer * lexer ) {
305- // Strictly, it's allowed for the kind to be an integer literal, in
306- // practice I've not seen it
307+ typedef Array (char ) String ;
308+
309+ // Returns NULL on error, otherwise an allocated char array for an identifier
310+ static String * scan_identifier (TSLexer * lexer ) {
307311 if (!iswalpha (lexer -> lookahead )) {
308- return false ;
312+ return NULL ;
309313 }
310-
311- lexer -> result_symbol = STRING_LITERAL_KIND ;
312-
313- // We need two characters of lookahead to see `_"`
314- char current_char = '\0' ;
315-
314+ String * possible_identifier = ts_calloc (1 , sizeof (String ));
316315 while (is_identifier_char (lexer -> lookahead ) && !lexer -> eof (lexer )) {
317- current_char = lexer -> lookahead ;
318- // Don't capture the trailing underscore as part of the kind identifier
319- if (lexer -> lookahead == '_' ) {
320- lexer -> mark_end (lexer );
321- }
322- advance (lexer );
316+ array_push (possible_identifier , lexer -> lookahead );
317+ // Don't capture the trailing underscore as part of the kind identifier
318+ // If another user of this function wants to mark the end again after
319+ // the identifier they're free to do so
320+ if (lexer -> lookahead == '_' ) {
321+ lexer -> mark_end (lexer );
322+ }
323+ advance (lexer );
324+ }
325+ if (possible_identifier -> size == 0 ) {
326+ ts_free (possible_identifier );
327+ return NULL ;
328+ }
329+ return possible_identifier ;
330+ }
331+
332+ static bool scan_string_literal_kind (TSLexer * lexer , String * identifier ) {
333+ if (identifier -> size == 0 ) {
334+ return false;
323335 }
324336
325- if ((current_char != '_' ) || (lexer -> lookahead != '"' && lexer -> lookahead != '\'' )) {
337+ char last_char = identifier -> contents [identifier -> size - 1 ];
338+ if ((last_char != '_' ) ||
339+ (lexer -> lookahead != '"' && lexer -> lookahead != '\'' )) {
326340 return false;
327341 }
328342
343+ lexer -> result_symbol = STRING_LITERAL_KIND ;
329344 return true;
330345}
331346
@@ -393,6 +408,28 @@ static bool scan_string_literal(TSLexer *lexer) {
393408 return false;
394409}
395410
411+ static bool scan_macro_identifier (Scanner * scanner , TSLexer * lexer ,
412+ String * identifier ) {
413+ unsigned num_macro_ids = scanner -> MacroIdentifiers .size ;
414+ if (num_macro_ids == 0 ) {
415+ return false;
416+ }
417+
418+ for (size_t i = 0 , end = scanner -> MacroIdentifiers .size ; i < end ; ++ i ) {
419+ char * macro_id = * array_get (& scanner -> MacroIdentifiers , i );
420+ unsigned macro_id_len = strlen (macro_id );
421+ if (identifier -> size != macro_id_len ) {
422+ continue ;
423+ }
424+ if (strncmp (macro_id , identifier -> contents , identifier -> size ) == 0 ) {
425+ lexer -> mark_end (lexer );
426+ lexer -> result_symbol = MACRO_IDENTIFIER ;
427+ return true;
428+ }
429+ }
430+ return false;
431+ }
432+
396433/// Need an external scanner to catch '!' before its parsed as a comment
397434static bool scan_preproc_unary_operator (TSLexer * lexer ) {
398435 const char next_char = lexer -> lookahead ;
@@ -467,19 +504,50 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
467504 return true;
468505 }
469506
470- if (valid_symbols [STRING_LITERAL_KIND ]) {
507+ // These symbols both scan for an identifier, we need to combine the logic
508+ // and they always need to be the last to look for since we can't backtrack
509+ if (valid_symbols [STRING_LITERAL_KIND ] || valid_symbols [MACRO_IDENTIFIER ]) {
510+ String * identifier = scan_identifier (lexer );
511+ bool identifier_result = false;
471512 // This may need a lot of lookahead, so should (probably) always
472513 // be the last token to look for
473- if (scan_string_literal_kind (lexer )) {
514+ if (identifier && valid_symbols [STRING_LITERAL_KIND ]) {
515+ if (scan_string_literal_kind (lexer , identifier )) {
516+ identifier_result = true;
517+ }
518+ }
519+ if (!identifier_result && identifier && valid_symbols [MACRO_IDENTIFIER ]) {
520+ if (scan_macro_identifier (scanner , lexer , identifier )) {
521+ identifier_result = true;
522+ }
523+ }
524+ if (identifier ) {
525+ ts_free (identifier );
526+ }
527+ if (identifier_result ) {
474528 return true;
475529 }
476530 }
477-
478531 return false;
479532}
480533
481534void * tree_sitter_fortran_external_scanner_create () {
482- return ts_calloc (1 , sizeof (bool ));
535+ Scanner * result = (Scanner * )ts_calloc (1 , sizeof (Scanner ));
536+ char * macro_ids = getenv ("CODEE_TS_MACRO_IDS" );
537+ if (!macro_ids ) {
538+ return result ;
539+ }
540+ char * macro_id = strtok (macro_ids , ":" );
541+ Array (char * ) * macroIdsResult = & result -> MacroIdentifiers ;
542+ while (macro_id ) {
543+ int length = strlen (macro_id );
544+ char * new_str = (char * )ts_malloc ((length + 1 ) * sizeof (char ));
545+ strncpy (new_str , macro_id , length );
546+ array_push (macroIdsResult , new_str );
547+ // Keep splitting
548+ macro_id = strtok (NULL , ":" );
549+ }
550+ return result ;
483551}
484552
485553bool tree_sitter_fortran_external_scanner_scan (void * payload , TSLexer * lexer ,
@@ -491,20 +559,26 @@ bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
491559unsigned tree_sitter_fortran_external_scanner_serialize (void * payload ,
492560 char * buffer ) {
493561 Scanner * scanner = (Scanner * )payload ;
494- buffer [0 ] = (char )scanner -> in_line_continuation ;
495- return 1 ;
562+ unsigned size = sizeof (* scanner );
563+ memcpy (buffer , scanner , size );
564+ return size ;
496565}
497566
498567void tree_sitter_fortran_external_scanner_deserialize (void * payload ,
499568 const char * buffer ,
500569 unsigned length ) {
501570 Scanner * scanner = (Scanner * )payload ;
502571 if (length > 0 ) {
503- scanner -> in_line_continuation = buffer [0 ];
572+ unsigned size = sizeof (* scanner );
573+ memcpy (scanner , buffer , size );
504574 }
505575}
506576
507577void tree_sitter_fortran_external_scanner_destroy (void * payload ) {
508578 Scanner * scanner = (Scanner * )payload ;
579+ for (size_t i = 0 , end = scanner -> MacroIdentifiers .size ; i < end ; ++ i ) {
580+ char * str = * array_get (& scanner -> MacroIdentifiers , i );
581+ ts_free (str );
582+ }
509583 ts_free (scanner );
510584}
0 commit comments