--- /dev/null
+#+TITLE: lex.cc Detailed Structure and Function Index
+#+Author: Caelus, code formalist (GPT-4, OpenAI), Thomas
+#+Date:2025-05-09
+
+* Data Structures Found in Non-Static Function Signatures
+** struct context
+Used in lexer or normalization stages to track state during token reclassification or Unicode normalization.
+
+** enum cpp_token_fld_kind
+Enumeration describing the internal storage kind for a preprocessor token's value โ distinguishes between identifiers, numbers, etc.
+
+** enum cpp_ttype
+Enumeration of token types recognized by the preprocessor (e.g., identifiers, punctuators, literals, etc.).
+
+** struct lit_accum
+Helper structure that accumulates string or character literal fragments during lexing.
+
+** struct normalize_state
+Tracks intermediate state during Unicode normalization of identifiers or literals.
+
+** struct token_spelling
+Structure used to store or compute the textual spelling of a token, including alternate representations (e.g., digraphs).
+* Data Structures Shared Among Functions in lex.cc
+** _cpp_buff
+Used in: _cpp_aligned_alloc, _cpp_extend_buff, _cpp_free_buff, _cpp_get_buff, _cpp_release_buff, free, is_macro, new_buff, usage
+Temporary token buffer used during macro argument collection and expansion. Shared to manage input buffering across stages.
+
+** context
+Used in: _cpp_remaining_tokens_num_in_context, character, if, maybe_warn_bidi_on_close, on_char, rich_loc
+State struct used in bidirectional text normalization and context-aware lexing. Functions reference it to apply UCN and bidi safety rules.
+
+** cpp_hashnode
+Used in: cpp_error, if, is_macro, lex_identifier, lex_identifier_intern, line, linemap_included_from
+Represents identifiers and macro definitions. Shared among symbol lookup, macro parsing, and token classification functions.
+
+** cpp_token
+Used in: RESULT, _cpp_temp_token, cpp_directive_only_process, cpp_output_line_to_string, if, line, linemap_included_from, own, return
+Token structure used to represent lexed entities passed between scanners, macro collectors, and diagnostic routines.
+
+** cpp_ttype
+Used in: is_macro, lex_string, own, return
+Enumeration of token types (e.g., identifiers, keywords, operators). Shared by scanners and type-check logic to interpret input.
+* Non-Static Functions
+** _cpp_aligned_alloc
+- Signature: `unsigned char * _cpp_aligned_alloc (...)`
+- Purpose: Allocates a buffer with alignment suitable for vectorized scanning operations (e.g., SSE, AVX).
+
+** _cpp_append_extend_buff
+- Signature: `_cpp_buff * _cpp_append_extend_buff (...)`
+- Purpose: Appends additional space to an existing token buffer, used when macro expansions exceed initial estimates.
+
+** _cpp_clean_line
+- Signature: `void _cpp_clean_line (...)`
+- Purpose: Cleans lexer line state after processing a complete logical line.
+
+** _cpp_commit_buff
+- Signature: `void * _cpp_commit_buff (...)`
+- Purpose: Finalizes a temporary token buffer and returns a stable pointer to the committed data.
+
+** _cpp_equiv_tokens
+- Signature: `int _cpp_equiv_tokens (...)`
+- Purpose: Determines whether two tokens are equivalent, ignoring cosmetic differences such as spacing.
+
+** _cpp_extend_buff
+- Signature: `void _cpp_extend_buff (...)`
+- Purpose: Increases the capacity of a token buffer to accommodate additional tokens during macro processing.
+
+** _cpp_free_buff
+- Signature: `void _cpp_free_buff (...)`
+- Purpose: Releases memory allocated for a temporary or committed token buffer.
+
+** _cpp_get_buff
+- Signature: `_cpp_buff * _cpp_get_buff (...)`
+- Purpose: Returns a new or recycled token buffer from the internal pool, minimizing allocations.
+
+** _cpp_get_fresh_line
+- Signature: `bool _cpp_get_fresh_line (...)`
+- Purpose: Consumes input until a logical line is ready. Handles escaped newlines.
+
+** _cpp_init_lexer
+- Signature: `void _cpp_init_lexer (...)`
+- Purpose: Initializes the core lexer state: buffers, token rings, and diagnostic counters.
+
+** _cpp_init_tokenrun
+- Signature: `void _cpp_init_tokenrun (...)`
+- Purpose: Initializes a ring buffer or region for holding tokens during lexing.
+
+** _cpp_lex_direct
+- Signature: `cpp_token * _cpp_lex_direct (...)`
+- Purpose: Lexes a single token from the input without macro expansion โ used for directive parsing.
+
+** _cpp_lex_identifier
+- Signature: `cpp_hashnode * _cpp_lex_identifier (...)`
+- Purpose: Lexes an identifier and returns a hashnode for it, performing UCN expansion and keyword recognition.
+
+** _cpp_lex_token
+- Signature: `const cpp_token * _cpp_lex_token (...)`
+- Purpose: Lexes the next token from the input stream, handling macro expansion and buffering.
+
+** _cpp_process_line_notes
+- Signature: `void _cpp_process_line_notes (...)`
+- Purpose: Handles mapping #line notes and diagnostic position metadata.
+
+** _cpp_release_buff
+- Signature: `void _cpp_release_buff (...)`
+- Purpose: Returns a previously used token buffer back to the internal pool for reuse.
+
+** _cpp_remaining_tokens_num_in_context
+- Signature: `int _cpp_remaining_tokens_num_in_context (...)`
+- Purpose: Returns how many tokens are left within the current lexing context.
+
+** _cpp_skip_block_comment
+- Signature: `bool _cpp_skip_block_comment (...)`
+- Purpose: Skips over block comments, optionally returning whether line state changed.
+
+** _cpp_spell_ident_ucns
+- Signature: `unsigned char * _cpp_spell_ident_ucns (...)`
+- Purpose: Generates a UTF-8 spelling for identifiers that contain Universal Character Names (UCNs).
+
+** _cpp_temp_token
+- Signature: `cpp_token * _cpp_temp_token (...)`
+- Purpose: Allocates space for a temporary token during parsing or lookahead.
+
+** _cpp_unaligned_alloc
+- Signature: `unsigned char * _cpp_unaligned_alloc (...)`
+- Purpose: Allocates unaligned memory for fallback lexers or comment scanning buffers.
+
+** cpp_alloc_token_string
+- Signature: `const uchar * cpp_alloc_token_string (...)`
+- Purpose: Allocates a fresh string buffer for a token's textual content, typically used in output or diagnostics.
+
+** cpp_avoid_paste
+- Signature: `int cpp_avoid_paste (...)`
+- Purpose: Determines whether a space is needed between two tokens to avoid unintended pasting.
+
+** cpp_force_token_locations
+- Signature: `void cpp_force_token_locations (...)`
+- Purpose: Forces the preprocessor to track source locations for all tokens, overriding lazy behavior.
+
+** cpp_get_comments
+- Signature: `cpp_comment_table * cpp_get_comments (...)`
+- Purpose: Returns a pointer to the internal comment table used for diagnostics or pretty-printing.
+
+** cpp_ideq
+- Signature: `int cpp_ideq (...)`
+- Purpose: Compares two identifiers for equality in a normalized preprocessor sense.
+
+** cpp_output_line
+- Signature: `void cpp_output_line (...)`
+- Purpose: Outputs an entire preprocessor line, including comments or tokens, to a file.
+
+** cpp_output_line_to_string
+- Signature: `unsigned char * cpp_output_line_to_string (...)`
+- Purpose: Generates a string representation of a preprocessed line for diagnostics.
+
+** cpp_output_token
+- Signature: `void cpp_output_token (...)`
+- Purpose: Writes a token to an output stream, respecting spacing and formatting rules.
+
+** cpp_peek_token
+- Signature: `const cpp_token * cpp_peek_token (...)`
+- Purpose: Returns a pointer to the next token without consuming it. Used in lookahead.
+
+** cpp_spell_token
+- Signature: `unsigned char * cpp_spell_token (...)`
+- Purpose: Computes or reconstructs the text spelling of a token from internal data.
+
+** cpp_stop_forcing_token_locations
+- Signature: `void cpp_stop_forcing_token_locations (...)`
+- Purpose: Stops forcibly tracking token locations, restoring default behavior.
+
+** cpp_token_as_text
+- Signature: `unsigned char * cpp_token_as_text (...)`
+- Purpose: Converts a token into its textual representation (used for macro debug output or trace logs).
+
+** cpp_token_len
+- Signature: `unsigned int cpp_token_len (...)`
+- Purpose: Computes the length of a token for buffer management or output purposes.
+
+** cpp_token_val_index
+- Signature: `enum cpp_token_fld_kind cpp_token_val_index (...)`
+- Purpose: Returns the kind of value stored in the token (e.g., string, identifier, number).
+
+** cpp_type2name
+- Signature: `const char * cpp_type2name (...)`
+- Purpose: Maps internal token types (e.g., CPP_NUMBER) to human-readable strings like "number".
+
+** current_ctx
+- Signature: `kind current_ctx (...)`
+- Purpose: Returns the current Unicode bidirectional context (e.g., LTR, RTL) used during lexing.
+
+** current_ctx_loc
+- Signature: `location_t current_ctx_loc (...)`
+- Purpose: Returns the source location associated with the current bidi context โ for diagnostics.
+
+** current_ctx_ucn_p
+- Signature: `bool current_ctx_ucn_p (...)`
+- Purpose: Returns whether the current Unicode context allows Universal Character Names (UCNs).
+
+** init_vectorized_lexer
+- Signature: `define HAVE_init_vectorized_lexer 1
+static inline void init_vectorized_lexer (...)`
+- Purpose: Initializes vectorized scanning function pointers depending on CPU features.
+
+** on_char
+- Signature: `void on_char (...)`
+- Purpose: Handles logic when a character is encountered that might affect bidirectional or normalization context.
+
+** on_close
+- Signature: `void on_close (...)`
+- Purpose: Called when a bidirectional context-closing token (e.g., PDF) is encountered.
+
+** pop
+- Signature: `void pop (...)`
+- Purpose: Pops the current normalization or bidi context off the internal context stack.
+
+** pop_kind_at
+- Signature: `kind pop_kind_at (...)`
+- Purpose: Returns the kind of context that would be popped at a given depth (used for lookahead).
+
+** read_char
+- Signature: `char read_char (...)`
+- Purpose: Reads a character from the input buffer, optionally applying normalization or escaping rules.
+
+** search_line_fast
+- Signature: `ATTRIBUTE_NO_SANITIZE_UNDEFINED
+static const uchar * search_line_fast (...)`
+- Purpose: Fallback vectorized line scanner for supported architectures. Tries MMX, SSE, etc.
+
+** search_line_fast
+- Signature: `define AARCH64_MIN_PAGE_SIZE 4096
+
+static const uchar * search_line_fast (...)`
+- Purpose: Fallback vectorized line scanner for supported architectures. Tries MMX, SSE, etc.
+
+** search_line_mmx
+- Signature: `endif search_line_mmx (...)`
+- Purpose: Performs vectorized scanning of input using MMX instructions.
+
+** search_line_sse2
+- Signature: `endif search_line_sse2 (...)`
+- Purpose: Performs fast input scanning using SSE2 instructions on aligned buffers.
+
+** search_line_sse42
+- Signature: `endif search_line_sse42 (...)`
+- Purpose: Uses SSE4.2 instructions (e.g., `pcmpestri`) to scan for newline and comment sequences.
+* File Scope Data Structures
+- `CPP_TOKEN_FLD_ARG_NO`
+- `CPP_TOKEN_FLD_NODE`
+- `CPP_TOKEN_FLD_NONE`
+- `CPP_TOKEN_FLD_PRAGMA`
+- `CPP_TOKEN_FLD_SOURCE`
+- `CPP_TOKEN_FLD_STR`
+- `CPP_TOKEN_FLD_TOKEN_NO`
+- `Foundation`
+- `NULL`
+- `SSE1`
+- `WARRANTY`
+- `a`
+- `accum`
+- `after_backslash`
+- `all_upper`
+- `alloced`
+- `backup`
+- `bad_string`
+- `bol`
+- `break`
+- `buffer`
+- `c`
+- `category`
+- `col`
+- `cols`
+- `combined_loc`
+- `count`
+- `data`
+- `delim_len`
+- `delimited_string`
+- `dest`
+- `dflt`
+- `done`
+- `done_comment`
+- `done_string`
+- `end`
+- `end_loc`
+- `end_offset`
+- `eol`
+- `esc`
+- `extra_len`
+- `f`
+- `fallthrough_comment`
+- `false`
+- `found`
+- `fresh_line`
+- `hash`
+- `header_count`
+- `i`
+- `impl`
+- `import`
+- `index`
+- `is_block`
+- `ix`
+- `j`
+- `l`
+- `la`
+- `len`
+- `line_count`
+- `loc`
+- `m`
+- `m_custom_label`
+- `m_kind`
+- `m_loc`
+- `m_ucn`
+- `magic`
+- `mask`
+- `maybe_number_start`
+- `minimum`
+- `misalign`
+- `module_p`
+- `n`
+- `name`
+- `new_buff`
+- `next_line`
+- `not_module`
+- `nst`
+- `num_bytes`
+- `ok`
+- `ones`
+- `orig_line`
+- `out`
+- `p`
+- `peek`
+- `peek_R`
+- `peek_u`
+- `peek_u8`
+- `peektok`
+- `prefix_len`
+- `program`
+- `ptr`
+- `quote_eight`
+- `quote_first`
+- `quote_peek`
+- `raw`
+- `read_note`
+- `repl_bs`
+- `repl_cr`
+- `repl_nl`
+- `repl_qm`
+- `restart`
+- `result`
+- `ret`
+- `room`
+- `s`
+- `saw_NUL`
+- `search`
+- `search_line_fast`
+- `second_raw`
+- `shift`
+- `si`
+- `size`
+- `skipped_white`
+- `slen`
+- `sloc`
+- `slow_path`
+- `software`
+- `spell_ident`
+- `spelling`
+- `src_loc`
+- `src_range`
+- `star`
+- `start`
+- `start_loc`
+- `start_offset`
+- `sv`
+- `sz`
+- `t`
+- `terminator`
+- `tok_range`
+- `true`
+- `type`
+- `ucn_len`
+- `ucn_len_c`
+- `update_tokens_line`
+- `utf32`
+- `utf8_signifier`
+- `utf8_start`
+- `v`
+- `want_number`
+- `warn_bidi`
+- `warn_bidi_p`
+- `was`
+- `word_type`
+- `ws`
+- `xmask`
+- `zero`
+
+* Static Functions
+- `void add_line_note (...)`
+- `int skip_line_comment (...)`
+- `void skip_whitespace (...)`
+- `void lex_string (...)`
+- `void save_comment (...)`
+- `void store_comment (...)`
+- `void create_literal (...)`
+- `bool warn_in_comment (...)`
+- `int name_p (...)`
+- `void add_line_note (...)`
+- `inline word_type acc_char_mask_misalign (...)`
+- `inline word_type acc_char_replicate (...)`
+- `inline word_type acc_char_cmp (...)`
+- `inline int acc_char_index (...)`
+- `const uchar * search_line_acc_char (...)`
+- `const uchar * search_line_acc_char (...)`
+- `const uchar * search_line_fast (...)`
+- `const uchar * search_line_fast (...)`
+- `bool warn_in_comment (...)`
+- `location_t get_location_for_byte_range_in_cur_line (...)`
+- `bidi::kind get_bidi_utf8_1 (...)`
+- `bidi::kind get_bidi_utf8 (...)`
+- `bidi::kind get_bidi_ucn_1 (...)`
+- `bidi::kind get_bidi_ucn (...)`
+- `void maybe_warn_bidi_on_close (...)`
+- `void maybe_warn_bidi_on_char (...)`
+- `int skip_line_comment (...)`
+- `void skip_whitespace (...)`
+- `int name_p (...)`
+- `void warn_about_normalization (...)`
+- `bool forms_identifier_p (...)`
+- `void maybe_va_opt_error (...)`
+- `cpp_hashnode * lex_identifier_intern (...)`
+- `cpp_hashnode * lex_identifier (...)`
+- `void lex_number (...)`
+- `void create_literal (...)`
+- `bool is_macro (...)`
+- `bool is_macro_not_literal_suffix (...)`
+- `void lex_raw_string (...)`
+- `void lex_string (...)`
+- `void store_comment (...)`
+- `void save_comment (...)`
+- `bool fallthrough_comment_p (...)`
+- `tokenrun * next_tokenrun (...)`
+- `const cpp_token* _cpp_token_from_context_at (...)`
+- `void cpp_maybe_module_directive (...)`
+- `size_t utf8_to_ucn (...)`
+- `const unsigned char * cpp_digraph2name (...)`
+- `_cpp_buff * new_buff (...)`
+- `const unsigned char * do_peek_backslash (...)`
+- `const unsigned char * do_peek_next (...)`
+- `const unsigned char * do_peek_prev (...)`
+- `const unsigned char * do_peek_ident (...)`
+- `bool do_peek_module (...)`
+
+
+
+
+
#warning, #include_next, and #import are deprecated. The name is
where the extension appears to have come from. */
-#define DIRECTIVE_TABLE \
- D(define, T_DEFINE = 0, KANDR, IN_I) \
- D(include, T_INCLUDE, KANDR, INCL | EXPAND) \
- D(endif, T_ENDIF, KANDR, COND) \
- D(ifdef, T_IFDEF, KANDR, COND | IF_COND) \
- D(if, T_IF, KANDR, COND | IF_COND | EXPAND) \
- D(else, T_ELSE, KANDR, COND) \
- D(ifndef, T_IFNDEF, KANDR, COND | IF_COND) \
- D(undef, T_UNDEF, KANDR, IN_I) \
- D(line, T_LINE, KANDR, EXPAND) \
- D(elif, T_ELIF, STDC89, COND | EXPAND) \
- D(elifdef, T_ELIFDEF, STDC2X, COND | ELIFDEF) \
- D(elifndef, T_ELIFNDEF, STDC2X, COND | ELIFDEF) \
- D(error, T_ERROR, STDC89, 0) \
- D(pragma, T_PRAGMA, STDC89, IN_I) \
- D(warning, T_WARNING, EXTENSION, 0) \
- D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND) \
- D(ident, T_IDENT, EXTENSION, IN_I) \
- D(import, T_IMPORT, EXTENSION, INCL | EXPAND) /* ObjC */ \
- D(assert, T_ASSERT, EXTENSION, DEPRECATED) /* SVR4 */ \
- D(unassert, T_UNASSERT, EXTENSION, DEPRECATED) /* SVR4 */ \
- D(sccs, T_SCCS, EXTENSION, IN_I) /* SVR4? */ \
- D(assign, T_ASSIGN, EXTENSION, IN_I)
+#define DIRECTIVE_TABLE \
+ D(define ,T_DEFINE = 0 ,KANDR ,IN_I) \
+ D(include ,T_INCLUDE ,KANDR ,INCL | EXPAND) \
+ D(endif ,T_ENDIF ,KANDR ,COND) \
+ D(ifdef ,T_IFDEF ,KANDR ,COND | IF_COND) \
+ D(if ,T_IF ,KANDR ,COND | IF_COND | EXPAND) \
+ D(else ,T_ELSE ,KANDR ,COND) \
+ D(ifndef ,T_IFNDEF ,KANDR ,COND | IF_COND) \
+ D(undef ,T_UNDEF ,KANDR ,IN_I) \
+ D(line ,T_LINE ,KANDR ,EXPAND) \
+ D(elif ,T_ELIF ,STDC89 ,COND | EXPAND) \
+ D(elifdef ,T_ELIFDEF ,STDC2X ,COND | ELIFDEF) \
+ D(elifndef ,T_ELIFNDEF ,STDC2X ,COND | ELIFDEF) \
+ D(error ,T_ERROR ,STDC89 ,0) \
+ D(pragma ,T_PRAGMA ,STDC89 ,IN_I) \
+ D(warning ,T_WARNING ,EXTENSION ,0) \
+ D(include_next ,T_INCLUDE_NEXT ,EXTENSION ,INCL | EXPAND) \
+ D(ident ,T_IDENT ,EXTENSION ,IN_I) \
+ D(import ,T_IMPORT ,EXTENSION ,INCL | EXPAND) /* ObjC */ \
+ D(assert ,T_ASSERT ,EXTENSION ,DEPRECATED) /* SVR4 */ \
+ D(unassert ,T_UNASSERT ,EXTENSION ,DEPRECATED) /* SVR4 */ \
+ D(sccs ,T_SCCS ,EXTENSION ,IN_I) /* SVR4? */ \
+ D(macro ,T_MACRO ,EXTENSION ,IN_I) \
+ D(assign ,T_ASSIGN ,EXTENSION ,IN_I)
+
/* #sccs is synonymous with #ident. */
#define do_sccs do_ident
//--------------------------------------------------------------------------------
+// RT extensions
+//--------------------------------------------------------------------------------
+
+//--------------------------------------------------------------------------------
+// directive `#macro`
+// #macro name (parameter [,parameter] ...) (body_expr)
+// #macro name () (body_expr)
+//
+// The body expr can be empty, but the parents remain
+// Whitespace between name and parents, and between parens, is ignored
+
+extern bool _cpp_create_macro (cpp_reader *pfile, cpp_hashnode *node);
+
+static void
+do_macro (cpp_reader *pfile)
+{
+ cpp_hashnode *node = lex_macro_node(pfile, true);
+
+ if(node)
+ {
+ /* If we have been requested to expand comments into macros,
+ then re-enable saving of comments. */
+ pfile->state.save_comments =
+ ! CPP_OPTION (pfile, discard_comments_in_macro_exp);
+
+ if(pfile->cb.before_define)
+ pfile->cb.before_define (pfile);
+
+ if( _cpp_create_macro(pfile, node) )
+ if (pfile->cb.define)
+ pfile->cb.define (pfile, pfile->directive_line, node);
+
+ node->flags &= ~NODE_USED;
+ }
+}
-extern bool _assign_handler(cpp_reader *pfile, cpp_hashnode *node);
+//--------------------------------------------------------------------------------
+// RT extention, directive `#assign`
+
+extern bool _cpp_create_assign(cpp_reader *pfile, cpp_hashnode *node);
const char *
cpp_token_as_text(const cpp_token *token)
return buffer;
}
-static void do_assign(cpp_reader *pfile){
+cpp_hashnode *
+_cpp_lex_paren_delim_token(cpp_reader *pfile){
const cpp_token *tok = _cpp_lex_token(pfile);
+
if(tok->type != CPP_OPEN_PAREN){
cpp_error_with_line(
pfile
,"expected '(' before name ,but found: %s"
,cpp_token_as_text(tok)
);
- return;
+ return NULL;
}
tok = _cpp_lex_token(pfile);
,"expected macro name identifier ,but found: %s"
,cpp_token_as_text(tok)
);
- return;
+ return NULL;
}
+
cpp_hashnode *node = tok->val.node.node;
tok = _cpp_lex_token(pfile);
,"expected ')' after macro name ,but found: %s"
,cpp_token_as_text(tok)
);
- return;
- }
-
- if(node){
- /* If we have been requested to expand comments into macros,
- then re-enable saving of comments. */
- pfile->state.save_comments =
- ! CPP_OPTION (pfile ,discard_comments_in_macro_exp);
-
- if (pfile->cb.before_define)
- pfile->cb.before_define (pfile);
-
- if (_assign_handler (pfile ,node))
- if (pfile->cb.define)
- pfile->cb.define (pfile ,pfile->directive_line ,node);
-
- node->flags &= ~NODE_USED;
- }
-}
-
-
-#if 0
-static void
-do_assign(cpp_reader *pfile){
-
- // cpp_hashnode *node = lex_macro_node (pfile, true);
- const cpp_token *tok = _cpp_lex_token(pfile);
- if (tok->type != CPP_OPEN_PAREN) {
- cpp_error_with_line(
- pfile,
- CPP_DL_ERROR,
- tok->src_loc,
- 0,
- "expected '(' before name, but found: %s"
- cpp_token_as_text(tok);
- );
- return;
- }
-
- tok = _cpp_lex_token(pfile);
- if (tok->type != CPP_NAME) {
- cpp_error_with_line(
- pfile,
- CPP_DL_ERROR,
- tok->src_loc,
- 0,
- "expected macro name identifier, but found: type=%d text='%.*s'",
- tok->type,
- tok->val.str.len,
- tok->val.str.text
- );
- return;
- }
- cpp_hashnode *node = tok->val.node.node;
-
- tok = _cpp_lex_token(pfile);
- if (tok->type != CPP_CLOSE_PAREN) {
- cpp_error_with_line(
- pfile,
- CPP_DL_ERROR,
- tok->src_loc,
- 0,
- "expected ')' after macro name, but found: type=%d text='%.*s'",
- tok->type,
- tok->val.str.len,
- tok->val.str.text
- );
- return;
- }
-
- if (node)
- {
- /* If we have been requested to expand comments into macros,
- then re-enable saving of comments. */
- pfile->state.save_comments =
- ! CPP_OPTION (pfile, discard_comments_in_macro_exp);
-
- if (pfile->cb.before_define)
- pfile->cb.before_define (pfile);
-
- if (_assign_handler (pfile, node))
- if (pfile->cb.define)
- pfile->cb.define (pfile, pfile->directive_line, node);
-
- node->flags &= ~NODE_USED;
- }
-}
-#endif
-
-#if 0
-
-cpp_token *
-assign_get_name(cpp_reader *pfile){
- // const cpp_token *name_token = cpp_get_token(pfile);
- const cpp_token *name_token = _cpp_lex_token(pfile);
-
- cpp_warning_with_line(
- pfile,
- CPP_W_NONE,
- name_token->src_loc,
- 0,
- "3 assign name is being set to: %.*s",
- name_token->val.str.len,
- name_token->val.str.text
- );
-
- if (name_token->type != CPP_NAME) {
- cpp_error_with_line(
- pfile,
- CPP_DL_ERROR,
- name_token->src_loc,
- 0,
- "First argument to #assign must be a macro name, instead found: %.*s",
- name_token->val.str.len,
- name_token->val.str.text
- );
return NULL;
}
- // Export this into the wider context
- cpp_token *copy = (cpp_token *) _cpp_reserve_room(pfile, 0, sizeof(cpp_token));
- *copy = *name_token;
- return copy;
+ return node;
}
-static void
-do_assign(cpp_reader *pfile)
-{
- cpp_token *name_token = assign_get_name(pfile);
- if (!name_token) {
- return;
- }
+static void do_assign(cpp_reader *pfile){
- cpp_macro *macro = _cpp_new_macro(
- pfile,
- cmk_macro,
- _cpp_reserve_room(pfile, 0, sizeof(cpp_macro))
- );
+ cpp_hashnode *node = _cpp_lex_paren_delim_token(pfile);
+ if(!node) return;
- macro->fun_like = 0;
- macro->paramc = 0;
- macro->variadic = 0;
- macro->count = 1;
- macro->used = 1;
+ /* If we have been requested to expand comments into macros,
+ then re-enable saving of comments. */
+ pfile->state.save_comments =
+ ! CPP_OPTION (pfile ,discard_comments_in_macro_exp);
- cpp_token *value_token = ¯o->exp.tokens[0];
- value_token->type = CPP_NUMBER;
- value_token->val.str.text = (const unsigned char *) "42";
- value_token->val.str.len = 2;
- value_token->flags = 0;
+ if (pfile->cb.before_define)
+ pfile->cb.before_define (pfile);
- cpp_hashnode *node = name_token->val.node.node;
- node->type = NT_USER_MACRO;
- node->value.macro = macro;
+ if (_cpp_create_assign (pfile ,node))
+ if (pfile->cb.define)
+ pfile->cb.define (pfile ,pfile->directive_line ,node);
- _cpp_mark_macro_used(node);
- cpp_warning(pfile, CPP_W_NONE, "Assigned macro %s as 42", NODE_NAME(node));
+ node->flags &= ~NODE_USED;
}
-#endif
+
return pfile->macro_buffer;
}
+
+//--------------------------------------------------------------------------------
+// RT extensions
+//--------------------------------------------------------------------------------
+
+// see directives.cc
+extern const char *cpp_token_as_text(const cpp_token *token);
+
+// a helper function for probing where we are at in the parse
+void
+debug_peek_token (cpp_reader *pfile)
+{
+ cpp_token *tok = _cpp_lex_direct(pfile);
+
+ cpp_error_with_line(
+ pfile,
+ CPP_DL_ERROR,
+ tok->src_loc,
+ 0,
+ "DEBUG: next token is: `%s`",
+ (const char *) cpp_token_as_text(tok)
+ );
+
+ _cpp_backup_tokens(pfile, 1);
+}
+
+static bool
+collect_macro_body_tokens (cpp_reader *pfile,
+ cpp_macro *macro,
+ unsigned int *num_extra_tokens_out,
+ const char *paste_op_error_msg)
+{
+ bool following_paste_op = false;
+ unsigned int num_extra_tokens = 0;
+
+ for (vaopt_state vaopt_tracker (pfile, macro->variadic, NULL);; )
+ {
+ cpp_token *token = NULL;
+
+ macro = lex_expansion_token(pfile, macro);
+ token = ¯o->exp.tokens[macro->count++];
+
+ if (macro->count > 1 && token[-1].type == CPP_HASH && macro->fun_like)
+ {
+ if (token->type == CPP_MACRO_ARG
+ || (macro->variadic
+ && token->type == CPP_NAME
+ && token->val.node.node == pfile->spec_nodes.n__VA_OPT__))
+ {
+ if (token->flags & PREV_WHITE)
+ token->flags |= SP_PREV_WHITE;
+ if (token[-1].flags & DIGRAPH)
+ token->flags |= SP_DIGRAPH;
+ token->flags &= ~PREV_WHITE;
+ token->flags |= STRINGIFY_ARG;
+ token->flags |= token[-1].flags & PREV_WHITE;
+ token[-1] = token[0];
+ macro->count--;
+ }
+ else if (CPP_OPTION (pfile, lang) != CLK_ASM)
+ {
+ cpp_error(pfile, CPP_DL_ERROR,
+ "'#' is not followed by a macro parameter");
+ return false;
+ }
+ }
+
+ if (token->type == CPP_EOF)
+ {
+ if (following_paste_op)
+ {
+ cpp_error(pfile, CPP_DL_ERROR, paste_op_error_msg);
+ return false;
+ }
+ if (!vaopt_tracker.completed())
+ return false;
+ break;
+ }
+
+ if (token->type == CPP_PASTE)
+ {
+ if (macro->count == 1)
+ {
+ cpp_error(pfile, CPP_DL_ERROR, paste_op_error_msg);
+ return false;
+ }
+
+ if (following_paste_op)
+ {
+ num_extra_tokens++;
+ token->val.token_no = macro->count - 1;
+ }
+ else
+ {
+ --macro->count;
+ token[-1].flags |= PASTE_LEFT;
+ if (token->flags & DIGRAPH)
+ token[-1].flags |= SP_DIGRAPH;
+ if (token->flags & PREV_WHITE)
+ token[-1].flags |= SP_PREV_WHITE;
+ }
+ following_paste_op = true;
+ }
+ else
+ following_paste_op = false;
+
+ if (vaopt_tracker.update(token) == vaopt_state::ERROR)
+ return false;
+ }
+
+ *num_extra_tokens_out = num_extra_tokens;
+ return true;
+}
+
+
//--------------------------------------------------------------------------------
+// for `#macro` directive
+/*
+ #macro NAME ( [optional parameters] ) (body)
+ like _cpp_create_definition though uses paren blancing instead or requiring a single line definition.
+*/
+
+/*
+ the cpp_macro struct is defined in cpplib.h: `struct GTY(()) cpp_macro {`
+ it has a flexible array field in a union as a last member: cpp_token tokens[1];
+*/
+
+// derived from create_iso_defined
+static cpp_macro *
+create_iso_macro (cpp_reader *pfile)
+{
+ bool following_paste_op = false;
+ const char *paste_op_error_msg =
+ N_("'##' cannot appear at either end of a macro expansion");
+ unsigned int num_extra_tokens = 0;
+ unsigned nparms = 0;
+ cpp_hashnode **params = NULL;
+ bool varadic = false;
+ bool ok = false;
+ cpp_macro *macro = NULL;
+
+ /*
+ -Saves token allocation address held in pfile->cur_token.
+ -Gives a new token allocation address to pfile->cur_token, that of cpp_token first.
+
+ Neither `first` nor `saved_cur_token` are referred to again, but as I don't have a
+ full test bench, I will leave this as I found it. Perhaps in the future if someone
+ understands what this is for, they can replace this comment. -Thomas
+
+ -Parses out a token called 'token'. 'token' does get used.
+ */
+ cpp_token first;
+ cpp_token *saved_cur_token = pfile->cur_token;
+ pfile->cur_token = &first;
+ cpp_token *token = _cpp_lex_direct (pfile);
+ pfile->cur_token = saved_cur_token;
+
+ /*
+ -For #define if the next token is a space, then it is not a function macro.
+ -For #macro it is always a function macro, perhaps with an empty param list.
+ */
+ if(token->type != CPP_OPEN_PAREN){
+ cpp_error_with_line(
+ pfile
+ ,CPP_DL_ERROR
+ ,token->src_loc
+ ,0
+ ,"expected '(' to open arguments list, but found: %s"
+ ,cpp_token_as_text(token)
+ );
+ goto out;
+ }
+
+ /*
+ - returns parameter list for a function macro, or NULL
+ - returns via &arg count of parameters
+ - returns via &arg the varadic flag
+
+ after parse_parms runs, the next token returned by pfile will be subsequent to the parameter list, e.g.:
+ 7 | #macro Q(f ,...) printf(f ,__VA_ARGS__)
+ | ^~~~~~
+
+ */
+ if( !parse_params(pfile, &nparms, &varadic) ) goto out;
+
+ // finalizes the reserved room, otherwise it will be reused on the next reserve room call.
+ params = (cpp_hashnode **)_cpp_commit_buff( pfile, sizeof (cpp_hashnode *) * nparms );
+ token = NULL;
+
+ // This reserves room for a new macro struct. A macro struct is variable size, the actual size will be worked out when the memory is committed.
+ macro = _cpp_new_macro(
+ pfile
+ ,cmk_macro
+ ,_cpp_reserve_room( pfile, 0, sizeof(cpp_macro) )
+ );
+ macro->variadic = varadic;
+ macro->paramc = nparms;
+ macro->parm.params = params;
+ macro->fun_like = true;
+
+ // collects from pfile the tokens that constitute the macro body
+ if (!collect_macro_body_tokens(pfile, macro, &num_extra_tokens, paste_op_error_msg))
+ goto out;
+
+ // At this point, even if the body parse fails, we will say we made a macro. I'm not sure why as we haven't commited it yet, but this is what is in the code. Apparently we throw away the macro if the body does not parse.
+ ok = true;
+
+ /* Don't count the CPP_EOF. */
+ macro->count--;
+
+ // commit the cpp struct to memory
+ // the struct reserves space for one token, the others run off the end
+ macro = (cpp_macro *)_cpp_commit_buff(
+ pfile
+ ,sizeof (cpp_macro) - sizeof (cpp_token) + sizeof (cpp_token) * macro->count
+ );
+
+
+ /*
+ It might be that the first token of the macro body was preceded by white space,so
+ the white space flag is set. However, upon expansion, there might not be a white
+ space before said token, so the following code clears the flag.
+ */
+ if (macro->count)
+ macro->exp.tokens[0].flags &= ~PREV_WHITE;
+
+ /*
+ Identifies consecutive ## tokens (a.k.a. CPP_PASTE) that were invalid or ambiguous,
+
+ Removes them from the main macro body,
+
+ Stashes them at the end of the tokens[] array in the same memory,
+
+ Sets macro->extra_tokens = 1 to signal their presence.
+ */
+ if (num_extra_tokens)
+ {
+ /* Place second and subsequent ## or %:%: tokens in sequences of
+ consecutive such tokens at the end of the list to preserve
+ information about where they appear, how they are spelt and
+ whether they are preceded by whitespace without otherwise
+ interfering with macro expansion. Remember, this is
+ extremely rare, so efficiency is not a priority. */
+ cpp_token *temp = (cpp_token *)_cpp_reserve_room
+ (pfile, 0, num_extra_tokens * sizeof (cpp_token));
+ unsigned extra_ix = 0, norm_ix = 0;
+ cpp_token *exp = macro->exp.tokens;
+ for (unsigned ix = 0; ix != macro->count; ix++)
+ if (exp[ix].type == CPP_PASTE)
+ temp[extra_ix++] = exp[ix];
+ else
+ exp[norm_ix++] = exp[ix];
+ memcpy (&exp[norm_ix], temp, num_extra_tokens * sizeof (cpp_token));
+
+ /* Record there are extra tokens. */
+ macro->extra_tokens = 1;
+ }
+
+ out:
+
+ /*
+ - This resets a flag in the parserโs state machine, pfile.
+ - The field `va_args_ok` tracks whether the current macro body is allowed to reference `__VA_ARGS__` (or more precisely, `__VA_OPT__`).
+ - It's set **while parsing a macro body** that might use variadic logic โ particularly in `vaopt_state` tracking.
+
+ Resetting it here ensures that future macros aren't accidentally parsed under the assumption that variadic substitution is valid.
+ */
+ pfile->state.va_args_ok = 0;
+
+ /*
+ Earlier we did:
+ if (!parse_params(pfile, &nparms, &variadic)) goto out;
+ This cleans up temporary memory used by parse_params.
+ */
+ _cpp_unsave_parameters (pfile, nparms);
+
+ return ok ? macro : NULL;
+}
+
+
bool
-_assign_handler(cpp_reader *pfile, cpp_hashnode *node){
+_cpp_create_macro(cpp_reader *pfile, cpp_hashnode *node){
cpp_macro *macro;
- if (CPP_OPTION (pfile, traditional))
- macro = _cpp_create_trad_definition (pfile);
- else
- macro = create_iso_definition (pfile);
+ macro = create_iso_macro (pfile);
if (!macro)
return false;
+//--------------------------------------------------------------------------------
+// similar to _cpp_create_definition, though evaluates the body first and uses
+// paren balancing rather than requiring a single line definition.
+bool
+_cpp_create_assign(cpp_reader *pfile, cpp_hashnode *node){
+ cpp_macro *macro;
-#if 0
-static cpp_token *
-assign_name_argument(cpp_reader *pfile){
- const cpp_token *name_token = cpp_get_token(pfile);
-
- cpp_warning_with_line(
- pfile
- ,CPP_W_NONE
- ,name_token->src_loc
- ,0
- ,"for debug, assign name is being set to: %.*s"
- ,name_token->val.str.len
- ,name_token->val.str.text
- );
+ if (CPP_OPTION (pfile, traditional))
+ macro = _cpp_create_trad_definition (pfile);
+ else
+ macro = create_iso_definition (pfile);
- if(name_token->type != CPP_NAME){
- cpp_error_with_line(
- pfile
- ,CPP_DL_ERROR
- ,name_token->src_loc
- ,0
- ,"First argument to #assign must be a macro name, instead found: %.*s"
- ,name_token->val.str.len
- ,name_token->val.str.text
- );
- return NULL;
- }
+ if (!macro)
+ return false;
- // export this into the wider context
- cpp_token *copy = (cpp_token *) _cpp_reserve_room(pfile ,0 ,sizeof(cpp_token));
- *copy = *name_token;
- return copy;
-}
+ if (cpp_macro_p (node))
+ {
+ if (CPP_OPTION (pfile, warn_unused_macros))
+ _cpp_warn_if_unused_macro (pfile, node, NULL);
-void assign_handler(cpp_reader *pfile){
+ if (warn_of_redefinition (pfile, node, macro))
+ {
+ const enum cpp_warning_reason reason
+ = (cpp_builtin_macro_p (node) && !(node->flags & NODE_WARN))
+ ? CPP_W_BUILTIN_MACRO_REDEFINED : CPP_W_NONE;
- // parse name argument
- const cpp_token *name_token = assign_name_argument(pfile);
- if(!name_token) return;
+ bool warned =
+ cpp_pedwarning_with_line (pfile, reason,
+ pfile->directive_line, 0,
+ "\"%s\" redefined", NODE_NAME (node));
- // create macro
- cpp_macro *macro = _cpp_new_macro(
- pfile
- ,cmk_macro
- ,_cpp_reserve_room(pfile ,0 ,sizeof(cpp_macro))
- );
+ if (warned && cpp_user_macro_p (node))
+ cpp_error_with_line (pfile, CPP_DL_NOTE,
+ node->value.macro->line, 0,
+ "this is the location of the previous definition");
+ }
+ _cpp_free_definition (node);
+ }
- macro->fun_like = 0;
- macro->paramc = 0;
- macro->variadic = 0;
- macro->count = 1;
- macro->used = 1;
-
- // fill value
- cpp_token *value_token = ¯o->exp.tokens[0];
- value_token->type = CPP_NUMBER;
- value_token->val.str.text = (const unsigned char *) "42";
- value_token->val.str.len = 2;
- value_token->flags = 0;
-
- // enter the definition into the symbol table
- cpp_hashnode *node = name_token->val.node.node;
- node->type = NT_USER_MACRO;
+ /* Enter definition in hash table. */
+ node->type = NT_USER_MACRO;
node->value.macro = macro;
+ if (! ustrncmp (NODE_NAME (node), DSC ("__STDC_"))
+ && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_FORMAT_MACROS")
+ /* __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS are mentioned
+ in the C standard, as something that one must use in C++.
+ However DR#593 and C++11 indicate that they play no role in C++.
+ We special-case them anyway. */
+ && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_LIMIT_MACROS")
+ && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_CONSTANT_MACROS"))
+ node->flags |= NODE_WARN;
- _cpp_mark_macro_used(node);
- cpp_warning(pfile ,CPP_W_NONE ,"Assigned macro %s as 42" ,NODE_NAME(node));
-}
-
-#endif
-
-#if 0
-static cpp_hashnode *
-assign_name_argument(cpp_reader *pfile){
- cpp_hashnode *node = lex_macro_node(pfile);
-
- if( !node || cpp_ide_is_keyword(node) ){
- cpp_error(pfile ,CPP_DL_ERROR ,"First argument to #assign must be a macro name");
- return NULL;
- }
+ /* If user defines one of the conditional macros, remove the
+ conditional flag */
+ node->flags &= ~NODE_CONDITIONAL;
- cpp_warning(pfile ,CPP_W_NONE ,"for debug, assign name is being set to: %s", NODE_NAME(node));
- return node;
+ return true;
}
-void
-assign_handler(cpp_reader *pfile){
-
- cpp_hashnode *node = assign_name_argument(pfile);
- if( !node )
- return; // error already reported
- // create macro
- cpp_macro *macro = _cpp_new_macro(
- pfile
- ,cmk_macro
- ,_cpp_reserve_room(pfile ,0 ,sizeof(cpp_macro))
- );
- macro->fun_like = 0;
- macro->paramc = 0;
- macro->variadic = 0;
- macro->count = 1;
- macro->used = 1;
-
- // fill value
- cpp_token *value_token = ¯o->exp.tokens[0];
- value_token->type = CPP_NUMBER;
- value_token->val.str.text = (const unsigned char *) "42";
- value_token->val.str.len = 2;
- value_token->flags = 0;
-
- // install macro
- node->type = NT_USER_MACRO;
- node->value.macro = macro;
- _cpp_mark_macro_used(node);
- cpp_warning(pfile ,CPP_W_NONE ,"Assigned macro %s as 42" ,NODE_NAME(node));
-}
-#endif