11#include "tree_sitter/alloc.h"
2+ #include "tree_sitter/array.h"
23#include "tree_sitter/parser.h"
34#include <ctype.h>
45#include <wctype.h>
@@ -13,10 +14,12 @@ enum TokenType {
1314 END_OF_STATEMENT ,
1415 PREPROC_UNARY_OPERATOR ,
1516 HOLLERITH_CONSTANT ,
17+ MACRO_IDENTIFIER ,
1618};
1719
1820typedef struct {
1921 bool in_line_continuation ;
22+ Array (char * ) MacroIdentifiers ;
2023} Scanner ;
2124
2225typedef enum {
@@ -301,31 +304,44 @@ static bool scan_end_line_continuation(Scanner *scanner, TSLexer *lexer) {
301304 return true;
302305}
303306
304- static bool scan_string_literal_kind (TSLexer * lexer ) {
305- // Strictly, it's allowed for the kind to be an integer literal, in
306- // practice I've not seen it
307+ typedef Array (char ) String ;
308+
309+ // Returns NULL on error, otherwise an allocated char array for an identifier
310+ static String * scan_identifier (TSLexer * lexer ) {
307311 if (!iswalpha (lexer -> lookahead )) {
308- return false ;
312+ return NULL ;
309313 }
310-
311- lexer -> result_symbol = STRING_LITERAL_KIND ;
312-
313- // We need two characters of lookahead to see `_"`
314- char current_char = '\0' ;
315-
314+ String * possible_identifier = ts_calloc (1 , sizeof (String ));
316315 while (is_identifier_char (lexer -> lookahead ) && !lexer -> eof (lexer )) {
317- current_char = lexer -> lookahead ;
318- // Don't capture the trailing underscore as part of the kind identifier
319- if (lexer -> lookahead == '_' ) {
320- lexer -> mark_end (lexer );
321- }
322- advance (lexer );
316+ array_push (possible_identifier , lexer -> lookahead );
317+ // Don't capture the trailing underscore as part of the kind identifier
318+ // If another user of this function wants to mark the end again after
319+ // the identifier they're free to do so
320+ if (lexer -> lookahead == '_' ) {
321+ lexer -> mark_end (lexer );
322+ }
323+ advance (lexer );
324+ }
325+ if (possible_identifier -> size == 0 ) {
326+ array_delete (possible_identifier );
327+ ts_free (possible_identifier );
328+ return NULL ;
329+ }
330+ return possible_identifier ;
331+ }
332+
333+ static bool scan_string_literal_kind (TSLexer * lexer , String * identifier ) {
334+ if (identifier -> size == 0 ) {
335+ return false;
323336 }
324337
325- if ((current_char != '_' ) || (lexer -> lookahead != '"' && lexer -> lookahead != '\'' )) {
338+ char last_char = identifier -> contents [identifier -> size - 1 ];
339+ if ((last_char != '_' ) ||
340+ (lexer -> lookahead != '"' && lexer -> lookahead != '\'' )) {
326341 return false;
327342 }
328343
344+ lexer -> result_symbol = STRING_LITERAL_KIND ;
329345 return true;
330346}
331347
@@ -393,6 +409,28 @@ static bool scan_string_literal(TSLexer *lexer) {
393409 return false;
394410}
395411
412+ static bool scan_macro_identifier (Scanner * scanner , TSLexer * lexer ,
413+ String * identifier ) {
414+ unsigned num_macro_ids = scanner -> MacroIdentifiers .size ;
415+ if (num_macro_ids == 0 ) {
416+ return false;
417+ }
418+
419+ for (size_t i = 0 , end = scanner -> MacroIdentifiers .size ; i < end ; ++ i ) {
420+ char * macro_id = * array_get (& scanner -> MacroIdentifiers , i );
421+ unsigned macro_id_len = strlen (macro_id );
422+ if (identifier -> size != macro_id_len ) {
423+ continue ;
424+ }
425+ if (strncmp (macro_id , identifier -> contents , identifier -> size ) == 0 ) {
426+ lexer -> mark_end (lexer );
427+ lexer -> result_symbol = MACRO_IDENTIFIER ;
428+ return true;
429+ }
430+ }
431+ return false;
432+ }
433+
396434/// Need an external scanner to catch '!' before its parsed as a comment
397435static bool scan_preproc_unary_operator (TSLexer * lexer ) {
398436 const char next_char = lexer -> lookahead ;
@@ -467,19 +505,51 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
467505 return true;
468506 }
469507
470- if (valid_symbols [STRING_LITERAL_KIND ]) {
508+ // These symbols both scan for an identifier, we need to combine the logic
509+ // and they always need to be the last to look for since we can't backtrack
510+ if (valid_symbols [STRING_LITERAL_KIND ] || valid_symbols [MACRO_IDENTIFIER ]) {
511+ String * identifier = scan_identifier (lexer );
512+ bool identifier_result = false;
471513 // This may need a lot of lookahead, so should (probably) always
472514 // be the last token to look for
473- if (scan_string_literal_kind (lexer )) {
515+ if (identifier && valid_symbols [STRING_LITERAL_KIND ]) {
516+ if (scan_string_literal_kind (lexer , identifier )) {
517+ identifier_result = true;
518+ }
519+ }
520+ if (!identifier_result && identifier && valid_symbols [MACRO_IDENTIFIER ]) {
521+ if (scan_macro_identifier (scanner , lexer , identifier )) {
522+ identifier_result = true;
523+ }
524+ }
525+ if (identifier ) {
526+ array_delete (identifier );
527+ ts_free (identifier );
528+ }
529+ if (identifier_result ) {
474530 return true;
475531 }
476532 }
477-
478533 return false;
479534}
480535
481536void * tree_sitter_fortran_external_scanner_create () {
482- return ts_calloc (1 , sizeof (bool ));
537+ Scanner * result = (Scanner * )ts_calloc (1 , sizeof (Scanner ));
538+ char * macro_ids = getenv ("CODEE_TS_MACRO_IDS" );
539+ if (!macro_ids ) {
540+ return result ;
541+ }
542+ char * macro_id = strtok (macro_ids , ":" );
543+ Array (char * ) * macroIdsResult = & result -> MacroIdentifiers ;
544+ while (macro_id ) {
545+ int length = strlen (macro_id );
546+ char * new_str = (char * )ts_malloc ((length + 1 ) * sizeof (char ));
547+ strncpy (new_str , macro_id , length );
548+ array_push (macroIdsResult , new_str );
549+ // Keep splitting
550+ macro_id = strtok (NULL , ":" );
551+ }
552+ return result ;
483553}
484554
485555bool tree_sitter_fortran_external_scanner_scan (void * payload , TSLexer * lexer ,
@@ -491,20 +561,27 @@ bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
491561unsigned tree_sitter_fortran_external_scanner_serialize (void * payload ,
492562 char * buffer ) {
493563 Scanner * scanner = (Scanner * )payload ;
494- buffer [0 ] = (char )scanner -> in_line_continuation ;
495- return 1 ;
564+ unsigned size = sizeof (* scanner );
565+ memcpy (buffer , scanner , size );
566+ return size ;
496567}
497568
498569void tree_sitter_fortran_external_scanner_deserialize (void * payload ,
499570 const char * buffer ,
500571 unsigned length ) {
501572 Scanner * scanner = (Scanner * )payload ;
502573 if (length > 0 ) {
503- scanner -> in_line_continuation = buffer [0 ];
574+ unsigned size = sizeof (* scanner );
575+ memcpy (scanner , buffer , size );
504576 }
505577}
506578
507579void tree_sitter_fortran_external_scanner_destroy (void * payload ) {
508580 Scanner * scanner = (Scanner * )payload ;
581+ for (size_t i = 0 , end = scanner -> MacroIdentifiers .size ; i < end ; ++ i ) {
582+ char * str = * array_get (& scanner -> MacroIdentifiers , i );
583+ ts_free (str );
584+ }
585+ array_delete (& scanner -> MacroIdentifiers );
509586 ts_free (scanner );
510587}
0 commit comments