Skip to content

Commit 893a406

Browse files
committed
Allow to parse macro identifiers in variable decls
1 parent 18000b1 commit 893a406

File tree

2 files changed

+100
-24
lines changed

2 files changed

+100
-24
lines changed

grammar.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ module.exports = grammar({
6767
$._external_end_of_statement,
6868
$._preproc_unary_operator,
6969
$.hollerith_constant,
70+
$.macro_identifier,
7071
],
7172

7273
extras: $ => [
@@ -870,6 +871,7 @@ module.exports = grammar({
870871
$.derived_type,
871872
alias($.procedure_declaration, $.procedure),
872873
$.declared_type,
874+
$.macro_identifier,
873875
)),
874876
optional(seq(',',
875877
commaSep1(

src/scanner.c

Lines changed: 98 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "tree_sitter/alloc.h"
2+
#include "tree_sitter/array.h"
23
#include "tree_sitter/parser.h"
34
#include <ctype.h>
45
#include <wctype.h>
@@ -13,10 +14,12 @@ enum TokenType {
1314
END_OF_STATEMENT,
1415
PREPROC_UNARY_OPERATOR,
1516
HOLLERITH_CONSTANT,
17+
MACRO_IDENTIFIER,
1618
};
1719

1820
typedef struct {
1921
bool in_line_continuation;
22+
Array(char *) MacroIdentifiers;
2023
} Scanner;
2124

2225
typedef enum {
@@ -301,31 +304,43 @@ static bool scan_end_line_continuation(Scanner *scanner, TSLexer *lexer) {
301304
return true;
302305
}
303306

304-
static bool scan_string_literal_kind(TSLexer *lexer) {
305-
// Strictly, it's allowed for the kind to be an integer literal, in
306-
// practice I've not seen it
307+
typedef Array(char) String;
308+
309+
// Returns NULL on error, otherwise an allocated char array for an identifier
310+
static String *scan_identifier(TSLexer *lexer) {
307311
if (!iswalpha(lexer->lookahead)) {
308-
return false;
312+
return NULL;
309313
}
310-
311-
lexer->result_symbol = STRING_LITERAL_KIND;
312-
313-
// We need two characters of lookahead to see `_"`
314-
char current_char = '\0';
315-
314+
String *possible_identifier = ts_calloc(1, sizeof(String));
316315
while (is_identifier_char(lexer->lookahead) && !lexer->eof(lexer)) {
317-
current_char = lexer->lookahead;
318-
// Don't capture the trailing underscore as part of the kind identifier
319-
if (lexer->lookahead == '_') {
320-
lexer->mark_end(lexer);
321-
}
322-
advance(lexer);
316+
array_push(possible_identifier, lexer->lookahead);
317+
// Don't capture the trailing underscore as part of the kind identifier
318+
// If another user of this function wants to mark the end again after
319+
// the identifier they're free to do so
320+
if (lexer->lookahead == '_') {
321+
lexer->mark_end(lexer);
322+
}
323+
advance(lexer);
324+
}
325+
if (possible_identifier->size == 0) {
326+
ts_free(possible_identifier);
327+
return NULL;
328+
}
329+
return possible_identifier;
330+
}
331+
332+
static bool scan_string_literal_kind(TSLexer *lexer, String *identifier) {
333+
if (identifier->size == 0) {
334+
return false;
323335
}
324336

325-
if ((current_char != '_') || (lexer->lookahead != '"' && lexer->lookahead != '\'')) {
337+
char last_char = identifier->contents[identifier->size - 1];
338+
if ((last_char != '_') ||
339+
(lexer->lookahead != '"' && lexer->lookahead != '\'')) {
326340
return false;
327341
}
328342

343+
lexer->result_symbol = STRING_LITERAL_KIND;
329344
return true;
330345
}
331346

@@ -393,6 +408,28 @@ static bool scan_string_literal(TSLexer *lexer) {
393408
return false;
394409
}
395410

411+
static bool scan_macro_identifier(Scanner *scanner, TSLexer *lexer,
412+
String *identifier) {
413+
unsigned num_macro_ids = scanner->MacroIdentifiers.size;
414+
if (num_macro_ids == 0) {
415+
return false;
416+
}
417+
418+
for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
419+
char *macro_id = *array_get(&scanner->MacroIdentifiers, i);
420+
unsigned macro_id_len = strlen(macro_id);
421+
if (identifier->size != macro_id_len) {
422+
continue;
423+
}
424+
if (strncmp(macro_id, identifier->contents, identifier->size) == 0) {
425+
lexer->mark_end(lexer);
426+
lexer->result_symbol = MACRO_IDENTIFIER;
427+
return true;
428+
}
429+
}
430+
return false;
431+
}
432+
396433
/// Need an external scanner to catch '!' before its parsed as a comment
397434
static bool scan_preproc_unary_operator(TSLexer *lexer) {
398435
const char next_char = lexer->lookahead;
@@ -467,19 +504,50 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
467504
return true;
468505
}
469506

470-
if (valid_symbols[STRING_LITERAL_KIND]) {
507+
// These symbols both scan for an identifier, we need to combine the logic
508+
// and they always need to be the last to look for since we can't backtrack
509+
if (valid_symbols[STRING_LITERAL_KIND] || valid_symbols[MACRO_IDENTIFIER]) {
510+
String *identifier = scan_identifier(lexer);
511+
bool identifier_result = false;
471512
// This may need a lot of lookahead, so should (probably) always
472513
// be the last token to look for
473-
if (scan_string_literal_kind(lexer)) {
514+
if (identifier && valid_symbols[STRING_LITERAL_KIND]) {
515+
if (scan_string_literal_kind(lexer, identifier)) {
516+
identifier_result = true;
517+
}
518+
}
519+
if (!identifier_result && identifier && valid_symbols[MACRO_IDENTIFIER]) {
520+
if (scan_macro_identifier(scanner, lexer, identifier)) {
521+
identifier_result = true;
522+
}
523+
}
524+
if (identifier) {
525+
ts_free(identifier);
526+
}
527+
if (identifier_result) {
474528
return true;
475529
}
476530
}
477-
478531
return false;
479532
}
480533

481534
void *tree_sitter_fortran_external_scanner_create() {
482-
return ts_calloc(1, sizeof(bool));
535+
Scanner *result = (Scanner *)ts_calloc(1, sizeof(Scanner));
536+
char *macro_ids = getenv("CODEE_TS_MACRO_IDS");
537+
if (!macro_ids) {
538+
return result;
539+
}
540+
char *macro_id = strtok(macro_ids, ":");
541+
Array(char *) *macroIdsResult = &result->MacroIdentifiers;
542+
while (macro_id) {
543+
int length = strlen(macro_id);
544+
char *new_str = (char *)ts_malloc((length + 1) * sizeof(char));
545+
strncpy(new_str, macro_id, length);
546+
array_push(macroIdsResult, new_str);
547+
// Keep splitting
548+
macro_id = strtok(NULL, ":");
549+
}
550+
return result;
483551
}
484552

485553
bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
@@ -491,20 +559,26 @@ bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
491559
unsigned tree_sitter_fortran_external_scanner_serialize(void *payload,
492560
char *buffer) {
493561
Scanner *scanner = (Scanner *)payload;
494-
buffer[0] = (char)scanner->in_line_continuation;
495-
return 1;
562+
unsigned size = sizeof(*scanner);
563+
memcpy(buffer, scanner, size);
564+
return size;
496565
}
497566

498567
void tree_sitter_fortran_external_scanner_deserialize(void *payload,
499568
const char *buffer,
500569
unsigned length) {
501570
Scanner *scanner = (Scanner *)payload;
502571
if (length > 0) {
503-
scanner->in_line_continuation = buffer[0];
572+
unsigned size = sizeof(*scanner);
573+
memcpy(scanner, buffer, size);
504574
}
505575
}
506576

507577
void tree_sitter_fortran_external_scanner_destroy(void *payload) {
508578
Scanner *scanner = (Scanner *)payload;
579+
for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
580+
char *str = *array_get(&scanner->MacroIdentifiers, i);
581+
ts_free(str);
582+
}
509583
ts_free(scanner);
510584
}

0 commit comments

Comments
 (0)