Skip to content

Commit b8a3d94

Browse files
committed
Allow to parse macro identifiers in variable decls
1 parent 18000b1 commit b8a3d94

File tree

2 files changed

+119
-24
lines changed

2 files changed

+119
-24
lines changed

grammar.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ module.exports = grammar({
6767
$._external_end_of_statement,
6868
$._preproc_unary_operator,
6969
$.hollerith_constant,
70+
$.macro_identifier,
7071
],
7172

7273
extras: $ => [
@@ -870,6 +871,7 @@ module.exports = grammar({
870871
$.derived_type,
871872
alias($.procedure_declaration, $.procedure),
872873
$.declared_type,
874+
$.macro_identifier,
873875
)),
874876
optional(seq(',',
875877
commaSep1(

src/scanner.c

Lines changed: 117 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "tree_sitter/alloc.h"
2+
#include "tree_sitter/array.h"
23
#include "tree_sitter/parser.h"
34
#include <ctype.h>
45
#include <wctype.h>
@@ -13,10 +14,14 @@ enum TokenType {
1314
END_OF_STATEMENT,
1415
PREPROC_UNARY_OPERATOR,
1516
HOLLERITH_CONSTANT,
17+
MACRO_IDENTIFIER,
1618
};
1719

20+
typedef Array(char *) StringArray;
21+
1822
typedef struct {
1923
bool in_line_continuation;
24+
StringArray MacroIdentifiers;
2025
} Scanner;
2126

2227
typedef enum {
@@ -301,31 +306,46 @@ static bool scan_end_line_continuation(Scanner *scanner, TSLexer *lexer) {
301306
return true;
302307
}
303308

304-
static bool scan_string_literal_kind(TSLexer *lexer) {
305-
// Strictly, it's allowed for the kind to be an integer literal, in
306-
// practice I've not seen it
309+
typedef Array(char) String;
310+
311+
// Returns NULL on error, otherwise an allocated char array for an identifier
312+
static String *scan_identifier(TSLexer *lexer) {
307313
if (!iswalpha(lexer->lookahead)) {
308-
return false;
314+
return NULL;
309315
}
310-
311-
lexer->result_symbol = STRING_LITERAL_KIND;
312-
313-
// We need two characters of lookahead to see `_"`
314-
char current_char = '\0';
315-
316+
String *possible_identifier = ts_calloc(1, sizeof(String));
316317
while (is_identifier_char(lexer->lookahead) && !lexer->eof(lexer)) {
317-
current_char = lexer->lookahead;
318-
// Don't capture the trailing underscore as part of the kind identifier
319-
if (lexer->lookahead == '_') {
320-
lexer->mark_end(lexer);
321-
}
322-
advance(lexer);
318+
array_push(possible_identifier, lexer->lookahead);
319+
// Don't capture the trailing underscore as part of the kind identifier
320+
// If another user of this function wants to mark the end again after
321+
// the identifier they're free to do so
322+
if (lexer->lookahead == '_') {
323+
lexer->mark_end(lexer);
324+
}
325+
advance(lexer);
326+
}
327+
if (possible_identifier->size == 0) {
328+
// First deallocate the array content itself and then the heap-allocated
329+
// array struct
330+
array_delete(possible_identifier);
331+
ts_free(possible_identifier);
332+
return NULL;
333+
}
334+
return possible_identifier;
335+
}
336+
337+
static bool scan_string_literal_kind(TSLexer *lexer, String *identifier) {
338+
if (identifier->size == 0) {
339+
return false;
323340
}
324341

325-
if ((current_char != '_') || (lexer->lookahead != '"' && lexer->lookahead != '\'')) {
342+
char last_char = identifier->contents[identifier->size - 1];
343+
if ((last_char != '_') ||
344+
(lexer->lookahead != '"' && lexer->lookahead != '\'')) {
326345
return false;
327346
}
328347

348+
lexer->result_symbol = STRING_LITERAL_KIND;
329349
return true;
330350
}
331351

@@ -393,6 +413,33 @@ static bool scan_string_literal(TSLexer *lexer) {
393413
return false;
394414
}
395415

416+
// Scans, using the MacroIdentifiers list from the scanner state, an identifier
417+
// that is contained in that list
418+
static bool scan_macro_identifier(Scanner *scanner, TSLexer *lexer,
419+
String *identifier) {
420+
unsigned num_macro_ids = scanner->MacroIdentifiers.size;
421+
// Nothing to compare against
422+
if (num_macro_ids == 0) {
423+
return false;
424+
}
425+
426+
// Find an equal macro identifier
427+
for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
428+
char *macro_id = *array_get(&scanner->MacroIdentifiers, i);
429+
unsigned macro_id_len = strlen(macro_id);
430+
// This will never be equal
431+
if (identifier->size != macro_id_len) {
432+
continue;
433+
}
434+
if (strncmp(macro_id, identifier->contents, identifier->size) == 0) {
435+
lexer->mark_end(lexer);
436+
lexer->result_symbol = MACRO_IDENTIFIER;
437+
return true;
438+
}
439+
}
440+
return false;
441+
}
442+
396443
/// Need an external scanner to catch '!' before its parsed as a comment
397444
static bool scan_preproc_unary_operator(TSLexer *lexer) {
398445
const char next_char = lexer->lookahead;
@@ -467,19 +514,57 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
467514
return true;
468515
}
469516

470-
if (valid_symbols[STRING_LITERAL_KIND]) {
517+
// These symbols both scan for an identifier, we need to combine the logic
518+
// and they always need to be the last to look for since we can't backtrack
519+
if (valid_symbols[STRING_LITERAL_KIND] || valid_symbols[MACRO_IDENTIFIER]) {
520+
String *identifier = scan_identifier(lexer);
521+
bool identifier_result = false;
471522
// This may need a lot of lookahead, so should (probably) always
472523
// be the last token to look for
473-
if (scan_string_literal_kind(lexer)) {
524+
if (identifier && valid_symbols[STRING_LITERAL_KIND]) {
525+
if (scan_string_literal_kind(lexer, identifier)) {
526+
identifier_result = true;
527+
}
528+
}
529+
if (!identifier_result && identifier && valid_symbols[MACRO_IDENTIFIER]) {
530+
if (scan_macro_identifier(scanner, lexer, identifier)) {
531+
identifier_result = true;
532+
}
533+
}
534+
if (identifier) {
535+
// First deallocate the array content itself and then the heap-allocated
536+
// array struct
537+
array_delete(identifier);
538+
ts_free(identifier);
539+
}
540+
if (identifier_result) {
474541
return true;
475542
}
476543
}
477-
478544
return false;
479545
}
480546

481547
void *tree_sitter_fortran_external_scanner_create() {
482-
return ts_calloc(1, sizeof(bool));
548+
Scanner *result = (Scanner *)ts_calloc(1, sizeof(Scanner));
549+
// First get the colon separated list of macro IDs from the environment
550+
char *macro_ids = getenv("CODEE_TS_MACRO_IDS");
551+
if (!macro_ids) {
552+
return result;
553+
}
554+
// Now separate them while we copy them to a list in the scanner state
555+
StringArray *macroIdsResult = &result->MacroIdentifiers;
556+
char *macro_id = strtok(macro_ids, ":");
557+
while (macro_id) {
558+
// strlen is safe with strtok's result
559+
int length = strlen(macro_id);
560+
// length + 1 for the null termination
561+
char *new_str = (char *)ts_calloc(1, (length + 1) * sizeof(char));
562+
strncpy(new_str, macro_id, length);
563+
array_push(macroIdsResult, new_str);
564+
// Keep splitting
565+
macro_id = strtok(NULL, ":");
566+
}
567+
return result;
483568
}
484569

485570
bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
@@ -491,20 +576,28 @@ bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
491576
unsigned tree_sitter_fortran_external_scanner_serialize(void *payload,
492577
char *buffer) {
493578
Scanner *scanner = (Scanner *)payload;
494-
buffer[0] = (char)scanner->in_line_continuation;
495-
return 1;
579+
unsigned size = sizeof(*scanner);
580+
memcpy(buffer, scanner, size);
581+
return size;
496582
}
497583

498584
void tree_sitter_fortran_external_scanner_deserialize(void *payload,
499585
const char *buffer,
500586
unsigned length) {
501587
Scanner *scanner = (Scanner *)payload;
502588
if (length > 0) {
503-
scanner->in_line_continuation = buffer[0];
589+
unsigned size = sizeof(*scanner);
590+
memcpy(scanner, buffer, size);
504591
}
505592
}
506593

507594
void tree_sitter_fortran_external_scanner_destroy(void *payload) {
508595
Scanner *scanner = (Scanner *)payload;
596+
// Destroy the strings allocated in each array element
597+
for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
598+
char *str = *array_get(&scanner->MacroIdentifiers, i);
599+
ts_free(str);
600+
}
601+
array_delete(&scanner->MacroIdentifiers);
509602
ts_free(scanner);
510603
}

0 commit comments

Comments
 (0)