From: Thomas Walker Lynch Date: Fri, 9 May 2025 09:42:08 +0000 (-0700) Subject: cp in #macro directive development X-Git-Url: https://git.reasoningtechnology.com/usr/lib/python2.7/encodings/cp1250.py?a=commitdiff_plain;h=c8a66d09d2d45faee9ba80d25e59569bb55d3245;p=RT-gcc cp in #macro directive development --- diff --git "a/document\360\237\226\211/source/lex_cc.org" "b/document\360\237\226\211/source/lex_cc.org" new file mode 100644 index 0000000..4f3c628 --- /dev/null +++ "b/document\360\237\226\211/source/lex_cc.org" @@ -0,0 +1,455 @@ +#+TITLE: lex.cc Detailed Structure and Function Index +#+Author: Caelus, code formalist (GPT-4, OpenAI), Thomas +#+Date:2025-05-09 + +* Data Structures Found in Non-Static Function Signatures +** struct context +Used in lexer or normalization stages to track state during token reclassification or Unicode normalization. + +** enum cpp_token_fld_kind +Enumeration describing the internal storage kind for a preprocessor token's value — distinguishes between identifiers, numbers, etc. + +** enum cpp_ttype +Enumeration of token types recognized by the preprocessor (e.g., identifiers, punctuators, literals, etc.). + +** struct lit_accum +Helper structure that accumulates string or character literal fragments during lexing. + +** struct normalize_state +Tracks intermediate state during Unicode normalization of identifiers or literals. + +** struct token_spelling +Structure used to store or compute the textual spelling of a token, including alternate representations (e.g., digraphs). +* Data Structures Shared Among Functions in lex.cc +** _cpp_buff +Used in: _cpp_aligned_alloc, _cpp_extend_buff, _cpp_free_buff, _cpp_get_buff, _cpp_release_buff, free, is_macro, new_buff, usage +Temporary token buffer used during macro argument collection and expansion. Shared to manage input buffering across stages. + +** context +Used in: _cpp_remaining_tokens_num_in_context, character, if, maybe_warn_bidi_on_close, on_char, rich_loc +State struct used in bidirectional text normalization and context-aware lexing. Functions reference it to apply UCN and bidi safety rules. + +** cpp_hashnode +Used in: cpp_error, if, is_macro, lex_identifier, lex_identifier_intern, line, linemap_included_from +Represents identifiers and macro definitions. Shared among symbol lookup, macro parsing, and token classification functions. + +** cpp_token +Used in: RESULT, _cpp_temp_token, cpp_directive_only_process, cpp_output_line_to_string, if, line, linemap_included_from, own, return +Token structure used to represent lexed entities passed between scanners, macro collectors, and diagnostic routines. + +** cpp_ttype +Used in: is_macro, lex_string, own, return +Enumeration of token types (e.g., identifiers, keywords, operators). Shared by scanners and type-check logic to interpret input. +* Non-Static Functions +** _cpp_aligned_alloc +- Signature: `unsigned char * _cpp_aligned_alloc (...)` +- Purpose: Allocates a buffer with alignment suitable for vectorized scanning operations (e.g., SSE, AVX). + +** _cpp_append_extend_buff +- Signature: `_cpp_buff * _cpp_append_extend_buff (...)` +- Purpose: Appends additional space to an existing token buffer, used when macro expansions exceed initial estimates. + +** _cpp_clean_line +- Signature: `void _cpp_clean_line (...)` +- Purpose: Cleans lexer line state after processing a complete logical line. + +** _cpp_commit_buff +- Signature: `void * _cpp_commit_buff (...)` +- Purpose: Finalizes a temporary token buffer and returns a stable pointer to the committed data. + +** _cpp_equiv_tokens +- Signature: `int _cpp_equiv_tokens (...)` +- Purpose: Determines whether two tokens are equivalent, ignoring cosmetic differences such as spacing. + +** _cpp_extend_buff +- Signature: `void _cpp_extend_buff (...)` +- Purpose: Increases the capacity of a token buffer to accommodate additional tokens during macro processing. + +** _cpp_free_buff +- Signature: `void _cpp_free_buff (...)` +- Purpose: Releases memory allocated for a temporary or committed token buffer. + +** _cpp_get_buff +- Signature: `_cpp_buff * _cpp_get_buff (...)` +- Purpose: Returns a new or recycled token buffer from the internal pool, minimizing allocations. + +** _cpp_get_fresh_line +- Signature: `bool _cpp_get_fresh_line (...)` +- Purpose: Consumes input until a logical line is ready. Handles escaped newlines. + +** _cpp_init_lexer +- Signature: `void _cpp_init_lexer (...)` +- Purpose: Initializes the core lexer state: buffers, token rings, and diagnostic counters. + +** _cpp_init_tokenrun +- Signature: `void _cpp_init_tokenrun (...)` +- Purpose: Initializes a ring buffer or region for holding tokens during lexing. + +** _cpp_lex_direct +- Signature: `cpp_token * _cpp_lex_direct (...)` +- Purpose: Lexes a single token from the input without macro expansion — used for directive parsing. + +** _cpp_lex_identifier +- Signature: `cpp_hashnode * _cpp_lex_identifier (...)` +- Purpose: Lexes an identifier and returns a hashnode for it, performing UCN expansion and keyword recognition. + +** _cpp_lex_token +- Signature: `const cpp_token * _cpp_lex_token (...)` +- Purpose: Lexes the next token from the input stream, handling macro expansion and buffering. + +** _cpp_process_line_notes +- Signature: `void _cpp_process_line_notes (...)` +- Purpose: Handles mapping #line notes and diagnostic position metadata. + +** _cpp_release_buff +- Signature: `void _cpp_release_buff (...)` +- Purpose: Returns a previously used token buffer back to the internal pool for reuse. + +** _cpp_remaining_tokens_num_in_context +- Signature: `int _cpp_remaining_tokens_num_in_context (...)` +- Purpose: Returns how many tokens are left within the current lexing context. + +** _cpp_skip_block_comment +- Signature: `bool _cpp_skip_block_comment (...)` +- Purpose: Skips over block comments, optionally returning whether line state changed. + +** _cpp_spell_ident_ucns +- Signature: `unsigned char * _cpp_spell_ident_ucns (...)` +- Purpose: Generates a UTF-8 spelling for identifiers that contain Universal Character Names (UCNs). + +** _cpp_temp_token +- Signature: `cpp_token * _cpp_temp_token (...)` +- Purpose: Allocates space for a temporary token during parsing or lookahead. + +** _cpp_unaligned_alloc +- Signature: `unsigned char * _cpp_unaligned_alloc (...)` +- Purpose: Allocates unaligned memory for fallback lexers or comment scanning buffers. + +** cpp_alloc_token_string +- Signature: `const uchar * cpp_alloc_token_string (...)` +- Purpose: Allocates a fresh string buffer for a token's textual content, typically used in output or diagnostics. + +** cpp_avoid_paste +- Signature: `int cpp_avoid_paste (...)` +- Purpose: Determines whether a space is needed between two tokens to avoid unintended pasting. + +** cpp_force_token_locations +- Signature: `void cpp_force_token_locations (...)` +- Purpose: Forces the preprocessor to track source locations for all tokens, overriding lazy behavior. + +** cpp_get_comments +- Signature: `cpp_comment_table * cpp_get_comments (...)` +- Purpose: Returns a pointer to the internal comment table used for diagnostics or pretty-printing. + +** cpp_ideq +- Signature: `int cpp_ideq (...)` +- Purpose: Compares two identifiers for equality in a normalized preprocessor sense. + +** cpp_output_line +- Signature: `void cpp_output_line (...)` +- Purpose: Outputs an entire preprocessor line, including comments or tokens, to a file. + +** cpp_output_line_to_string +- Signature: `unsigned char * cpp_output_line_to_string (...)` +- Purpose: Generates a string representation of a preprocessed line for diagnostics. + +** cpp_output_token +- Signature: `void cpp_output_token (...)` +- Purpose: Writes a token to an output stream, respecting spacing and formatting rules. + +** cpp_peek_token +- Signature: `const cpp_token * cpp_peek_token (...)` +- Purpose: Returns a pointer to the next token without consuming it. Used in lookahead. + +** cpp_spell_token +- Signature: `unsigned char * cpp_spell_token (...)` +- Purpose: Computes or reconstructs the text spelling of a token from internal data. + +** cpp_stop_forcing_token_locations +- Signature: `void cpp_stop_forcing_token_locations (...)` +- Purpose: Stops forcibly tracking token locations, restoring default behavior. + +** cpp_token_as_text +- Signature: `unsigned char * cpp_token_as_text (...)` +- Purpose: Converts a token into its textual representation (used for macro debug output or trace logs). + +** cpp_token_len +- Signature: `unsigned int cpp_token_len (...)` +- Purpose: Computes the length of a token for buffer management or output purposes. + +** cpp_token_val_index +- Signature: `enum cpp_token_fld_kind cpp_token_val_index (...)` +- Purpose: Returns the kind of value stored in the token (e.g., string, identifier, number). + +** cpp_type2name +- Signature: `const char * cpp_type2name (...)` +- Purpose: Maps internal token types (e.g., CPP_NUMBER) to human-readable strings like "number". + +** current_ctx +- Signature: `kind current_ctx (...)` +- Purpose: Returns the current Unicode bidirectional context (e.g., LTR, RTL) used during lexing. + +** current_ctx_loc +- Signature: `location_t current_ctx_loc (...)` +- Purpose: Returns the source location associated with the current bidi context — for diagnostics. + +** current_ctx_ucn_p +- Signature: `bool current_ctx_ucn_p (...)` +- Purpose: Returns whether the current Unicode context allows Universal Character Names (UCNs). + +** init_vectorized_lexer +- Signature: `define HAVE_init_vectorized_lexer 1 +static inline void init_vectorized_lexer (...)` +- Purpose: Initializes vectorized scanning function pointers depending on CPU features. + +** on_char +- Signature: `void on_char (...)` +- Purpose: Handles logic when a character is encountered that might affect bidirectional or normalization context. + +** on_close +- Signature: `void on_close (...)` +- Purpose: Called when a bidirectional context-closing token (e.g., PDF) is encountered. + +** pop +- Signature: `void pop (...)` +- Purpose: Pops the current normalization or bidi context off the internal context stack. + +** pop_kind_at +- Signature: `kind pop_kind_at (...)` +- Purpose: Returns the kind of context that would be popped at a given depth (used for lookahead). + +** read_char +- Signature: `char read_char (...)` +- Purpose: Reads a character from the input buffer, optionally applying normalization or escaping rules. + +** search_line_fast +- Signature: `ATTRIBUTE_NO_SANITIZE_UNDEFINED +static const uchar * search_line_fast (...)` +- Purpose: Fallback vectorized line scanner for supported architectures. Tries MMX, SSE, etc. + +** search_line_fast +- Signature: `define AARCH64_MIN_PAGE_SIZE 4096 + +static const uchar * search_line_fast (...)` +- Purpose: Fallback vectorized line scanner for supported architectures. Tries MMX, SSE, etc. + +** search_line_mmx +- Signature: `endif search_line_mmx (...)` +- Purpose: Performs vectorized scanning of input using MMX instructions. + +** search_line_sse2 +- Signature: `endif search_line_sse2 (...)` +- Purpose: Performs fast input scanning using SSE2 instructions on aligned buffers. + +** search_line_sse42 +- Signature: `endif search_line_sse42 (...)` +- Purpose: Uses SSE4.2 instructions (e.g., `pcmpestri`) to scan for newline and comment sequences. +* File Scope Data Structures +- `CPP_TOKEN_FLD_ARG_NO` +- `CPP_TOKEN_FLD_NODE` +- `CPP_TOKEN_FLD_NONE` +- `CPP_TOKEN_FLD_PRAGMA` +- `CPP_TOKEN_FLD_SOURCE` +- `CPP_TOKEN_FLD_STR` +- `CPP_TOKEN_FLD_TOKEN_NO` +- `Foundation` +- `NULL` +- `SSE1` +- `WARRANTY` +- `a` +- `accum` +- `after_backslash` +- `all_upper` +- `alloced` +- `backup` +- `bad_string` +- `bol` +- `break` +- `buffer` +- `c` +- `category` +- `col` +- `cols` +- `combined_loc` +- `count` +- `data` +- `delim_len` +- `delimited_string` +- `dest` +- `dflt` +- `done` +- `done_comment` +- `done_string` +- `end` +- `end_loc` +- `end_offset` +- `eol` +- `esc` +- `extra_len` +- `f` +- `fallthrough_comment` +- `false` +- `found` +- `fresh_line` +- `hash` +- `header_count` +- `i` +- `impl` +- `import` +- `index` +- `is_block` +- `ix` +- `j` +- `l` +- `la` +- `len` +- `line_count` +- `loc` +- `m` +- `m_custom_label` +- `m_kind` +- `m_loc` +- `m_ucn` +- `magic` +- `mask` +- `maybe_number_start` +- `minimum` +- `misalign` +- `module_p` +- `n` +- `name` +- `new_buff` +- `next_line` +- `not_module` +- `nst` +- `num_bytes` +- `ok` +- `ones` +- `orig_line` +- `out` +- `p` +- `peek` +- `peek_R` +- `peek_u` +- `peek_u8` +- `peektok` +- `prefix_len` +- `program` +- `ptr` +- `quote_eight` +- `quote_first` +- `quote_peek` +- `raw` +- `read_note` +- `repl_bs` +- `repl_cr` +- `repl_nl` +- `repl_qm` +- `restart` +- `result` +- `ret` +- `room` +- `s` +- `saw_NUL` +- `search` +- `search_line_fast` +- `second_raw` +- `shift` +- `si` +- `size` +- `skipped_white` +- `slen` +- `sloc` +- `slow_path` +- `software` +- `spell_ident` +- `spelling` +- `src_loc` +- `src_range` +- `star` +- `start` +- `start_loc` +- `start_offset` +- `sv` +- `sz` +- `t` +- `terminator` +- `tok_range` +- `true` +- `type` +- `ucn_len` +- `ucn_len_c` +- `update_tokens_line` +- `utf32` +- `utf8_signifier` +- `utf8_start` +- `v` +- `want_number` +- `warn_bidi` +- `warn_bidi_p` +- `was` +- `word_type` +- `ws` +- `xmask` +- `zero` + +* Static Functions +- `void add_line_note (...)` +- `int skip_line_comment (...)` +- `void skip_whitespace (...)` +- `void lex_string (...)` +- `void save_comment (...)` +- `void store_comment (...)` +- `void create_literal (...)` +- `bool warn_in_comment (...)` +- `int name_p (...)` +- `void add_line_note (...)` +- `inline word_type acc_char_mask_misalign (...)` +- `inline word_type acc_char_replicate (...)` +- `inline word_type acc_char_cmp (...)` +- `inline int acc_char_index (...)` +- `const uchar * search_line_acc_char (...)` +- `const uchar * search_line_acc_char (...)` +- `const uchar * search_line_fast (...)` +- `const uchar * search_line_fast (...)` +- `bool warn_in_comment (...)` +- `location_t get_location_for_byte_range_in_cur_line (...)` +- `bidi::kind get_bidi_utf8_1 (...)` +- `bidi::kind get_bidi_utf8 (...)` +- `bidi::kind get_bidi_ucn_1 (...)` +- `bidi::kind get_bidi_ucn (...)` +- `void maybe_warn_bidi_on_close (...)` +- `void maybe_warn_bidi_on_char (...)` +- `int skip_line_comment (...)` +- `void skip_whitespace (...)` +- `int name_p (...)` +- `void warn_about_normalization (...)` +- `bool forms_identifier_p (...)` +- `void maybe_va_opt_error (...)` +- `cpp_hashnode * lex_identifier_intern (...)` +- `cpp_hashnode * lex_identifier (...)` +- `void lex_number (...)` +- `void create_literal (...)` +- `bool is_macro (...)` +- `bool is_macro_not_literal_suffix (...)` +- `void lex_raw_string (...)` +- `void lex_string (...)` +- `void store_comment (...)` +- `void save_comment (...)` +- `bool fallthrough_comment_p (...)` +- `tokenrun * next_tokenrun (...)` +- `const cpp_token* _cpp_token_from_context_at (...)` +- `void cpp_maybe_module_directive (...)` +- `size_t utf8_to_ucn (...)` +- `const unsigned char * cpp_digraph2name (...)` +- `_cpp_buff * new_buff (...)` +- `const unsigned char * do_peek_backslash (...)` +- `const unsigned char * do_peek_next (...)` +- `const unsigned char * do_peek_prev (...)` +- `const unsigned char * do_peek_ident (...)` +- `bool do_peek_module (...)` + + + + + diff --git "a/script_gcc_min-12\360\237\226\211/directives.cc" "b/script_gcc_min-12\360\237\226\211/directives.cc" index 8ee29b3..db37dd5 100644 --- "a/script_gcc_min-12\360\237\226\211/directives.cc" +++ "b/script_gcc_min-12\360\237\226\211/directives.cc" @@ -143,29 +143,31 @@ static void cpp_pop_definition (cpp_reader *, struct def_pragma_macro *); #warning, #include_next, and #import are deprecated. The name is where the extension appears to have come from. */ -#define DIRECTIVE_TABLE \ - D(define, T_DEFINE = 0, KANDR, IN_I) \ - D(include, T_INCLUDE, KANDR, INCL | EXPAND) \ - D(endif, T_ENDIF, KANDR, COND) \ - D(ifdef, T_IFDEF, KANDR, COND | IF_COND) \ - D(if, T_IF, KANDR, COND | IF_COND | EXPAND) \ - D(else, T_ELSE, KANDR, COND) \ - D(ifndef, T_IFNDEF, KANDR, COND | IF_COND) \ - D(undef, T_UNDEF, KANDR, IN_I) \ - D(line, T_LINE, KANDR, EXPAND) \ - D(elif, T_ELIF, STDC89, COND | EXPAND) \ - D(elifdef, T_ELIFDEF, STDC2X, COND | ELIFDEF) \ - D(elifndef, T_ELIFNDEF, STDC2X, COND | ELIFDEF) \ - D(error, T_ERROR, STDC89, 0) \ - D(pragma, T_PRAGMA, STDC89, IN_I) \ - D(warning, T_WARNING, EXTENSION, 0) \ - D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND) \ - D(ident, T_IDENT, EXTENSION, IN_I) \ - D(import, T_IMPORT, EXTENSION, INCL | EXPAND) /* ObjC */ \ - D(assert, T_ASSERT, EXTENSION, DEPRECATED) /* SVR4 */ \ - D(unassert, T_UNASSERT, EXTENSION, DEPRECATED) /* SVR4 */ \ - D(sccs, T_SCCS, EXTENSION, IN_I) /* SVR4? */ \ - D(assign, T_ASSIGN, EXTENSION, IN_I) +#define DIRECTIVE_TABLE \ + D(define ,T_DEFINE = 0 ,KANDR ,IN_I) \ + D(include ,T_INCLUDE ,KANDR ,INCL | EXPAND) \ + D(endif ,T_ENDIF ,KANDR ,COND) \ + D(ifdef ,T_IFDEF ,KANDR ,COND | IF_COND) \ + D(if ,T_IF ,KANDR ,COND | IF_COND | EXPAND) \ + D(else ,T_ELSE ,KANDR ,COND) \ + D(ifndef ,T_IFNDEF ,KANDR ,COND | IF_COND) \ + D(undef ,T_UNDEF ,KANDR ,IN_I) \ + D(line ,T_LINE ,KANDR ,EXPAND) \ + D(elif ,T_ELIF ,STDC89 ,COND | EXPAND) \ + D(elifdef ,T_ELIFDEF ,STDC2X ,COND | ELIFDEF) \ + D(elifndef ,T_ELIFNDEF ,STDC2X ,COND | ELIFDEF) \ + D(error ,T_ERROR ,STDC89 ,0) \ + D(pragma ,T_PRAGMA ,STDC89 ,IN_I) \ + D(warning ,T_WARNING ,EXTENSION ,0) \ + D(include_next ,T_INCLUDE_NEXT ,EXTENSION ,INCL | EXPAND) \ + D(ident ,T_IDENT ,EXTENSION ,IN_I) \ + D(import ,T_IMPORT ,EXTENSION ,INCL | EXPAND) /* ObjC */ \ + D(assert ,T_ASSERT ,EXTENSION ,DEPRECATED) /* SVR4 */ \ + D(unassert ,T_UNASSERT ,EXTENSION ,DEPRECATED) /* SVR4 */ \ + D(sccs ,T_SCCS ,EXTENSION ,IN_I) /* SVR4? */ \ + D(macro ,T_MACRO ,EXTENSION ,IN_I) \ + D(assign ,T_ASSIGN ,EXTENSION ,IN_I) + /* #sccs is synonymous with #ident. */ #define do_sccs do_ident @@ -2800,9 +2802,47 @@ _cpp_bracket_include(cpp_reader *pfile) //-------------------------------------------------------------------------------- +// RT extensions +//-------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------- +// directive `#macro` +// #macro name (parameter [,parameter] ...) (body_expr) +// #macro name () (body_expr) +// +// The body expr can be empty, but the parents remain +// Whitespace between name and parents, and between parens, is ignored + +extern bool _cpp_create_macro (cpp_reader *pfile, cpp_hashnode *node); + +static void +do_macro (cpp_reader *pfile) +{ + cpp_hashnode *node = lex_macro_node(pfile, true); + + if(node) + { + /* If we have been requested to expand comments into macros, + then re-enable saving of comments. */ + pfile->state.save_comments = + ! CPP_OPTION (pfile, discard_comments_in_macro_exp); + + if(pfile->cb.before_define) + pfile->cb.before_define (pfile); + + if( _cpp_create_macro(pfile, node) ) + if (pfile->cb.define) + pfile->cb.define (pfile, pfile->directive_line, node); + + node->flags &= ~NODE_USED; + } +} -extern bool _assign_handler(cpp_reader *pfile, cpp_hashnode *node); +//-------------------------------------------------------------------------------- +// RT extention, directive `#assign` + +extern bool _cpp_create_assign(cpp_reader *pfile, cpp_hashnode *node); const char * cpp_token_as_text(const cpp_token *token) @@ -2857,8 +2897,10 @@ cpp_token_as_text(const cpp_token *token) return buffer; } -static void do_assign(cpp_reader *pfile){ +cpp_hashnode * +_cpp_lex_paren_delim_token(cpp_reader *pfile){ const cpp_token *tok = _cpp_lex_token(pfile); + if(tok->type != CPP_OPEN_PAREN){ cpp_error_with_line( pfile @@ -2868,7 +2910,7 @@ static void do_assign(cpp_reader *pfile){ ,"expected '(' before name ,but found: %s" ,cpp_token_as_text(tok) ); - return; + return NULL; } tok = _cpp_lex_token(pfile); @@ -2881,8 +2923,9 @@ static void do_assign(cpp_reader *pfile){ ,"expected macro name identifier ,but found: %s" ,cpp_token_as_text(tok) ); - return; + return NULL; } + cpp_hashnode *node = tok->val.node.node; tok = _cpp_lex_token(pfile); @@ -2895,164 +2938,31 @@ static void do_assign(cpp_reader *pfile){ ,"expected ')' after macro name ,but found: %s" ,cpp_token_as_text(tok) ); - return; - } - - if(node){ - /* If we have been requested to expand comments into macros, - then re-enable saving of comments. */ - pfile->state.save_comments = - ! CPP_OPTION (pfile ,discard_comments_in_macro_exp); - - if (pfile->cb.before_define) - pfile->cb.before_define (pfile); - - if (_assign_handler (pfile ,node)) - if (pfile->cb.define) - pfile->cb.define (pfile ,pfile->directive_line ,node); - - node->flags &= ~NODE_USED; - } -} - - -#if 0 -static void -do_assign(cpp_reader *pfile){ - - // cpp_hashnode *node = lex_macro_node (pfile, true); - const cpp_token *tok = _cpp_lex_token(pfile); - if (tok->type != CPP_OPEN_PAREN) { - cpp_error_with_line( - pfile, - CPP_DL_ERROR, - tok->src_loc, - 0, - "expected '(' before name, but found: %s" - cpp_token_as_text(tok); - ); - return; - } - - tok = _cpp_lex_token(pfile); - if (tok->type != CPP_NAME) { - cpp_error_with_line( - pfile, - CPP_DL_ERROR, - tok->src_loc, - 0, - "expected macro name identifier, but found: type=%d text='%.*s'", - tok->type, - tok->val.str.len, - tok->val.str.text - ); - return; - } - cpp_hashnode *node = tok->val.node.node; - - tok = _cpp_lex_token(pfile); - if (tok->type != CPP_CLOSE_PAREN) { - cpp_error_with_line( - pfile, - CPP_DL_ERROR, - tok->src_loc, - 0, - "expected ')' after macro name, but found: type=%d text='%.*s'", - tok->type, - tok->val.str.len, - tok->val.str.text - ); - return; - } - - if (node) - { - /* If we have been requested to expand comments into macros, - then re-enable saving of comments. */ - pfile->state.save_comments = - ! CPP_OPTION (pfile, discard_comments_in_macro_exp); - - if (pfile->cb.before_define) - pfile->cb.before_define (pfile); - - if (_assign_handler (pfile, node)) - if (pfile->cb.define) - pfile->cb.define (pfile, pfile->directive_line, node); - - node->flags &= ~NODE_USED; - } -} -#endif - -#if 0 - -cpp_token * -assign_get_name(cpp_reader *pfile){ - // const cpp_token *name_token = cpp_get_token(pfile); - const cpp_token *name_token = _cpp_lex_token(pfile); - - cpp_warning_with_line( - pfile, - CPP_W_NONE, - name_token->src_loc, - 0, - "3 assign name is being set to: %.*s", - name_token->val.str.len, - name_token->val.str.text - ); - - if (name_token->type != CPP_NAME) { - cpp_error_with_line( - pfile, - CPP_DL_ERROR, - name_token->src_loc, - 0, - "First argument to #assign must be a macro name, instead found: %.*s", - name_token->val.str.len, - name_token->val.str.text - ); return NULL; } - // Export this into the wider context - cpp_token *copy = (cpp_token *) _cpp_reserve_room(pfile, 0, sizeof(cpp_token)); - *copy = *name_token; - return copy; + return node; } -static void -do_assign(cpp_reader *pfile) -{ - cpp_token *name_token = assign_get_name(pfile); - if (!name_token) { - return; - } +static void do_assign(cpp_reader *pfile){ - cpp_macro *macro = _cpp_new_macro( - pfile, - cmk_macro, - _cpp_reserve_room(pfile, 0, sizeof(cpp_macro)) - ); + cpp_hashnode *node = _cpp_lex_paren_delim_token(pfile); + if(!node) return; - macro->fun_like = 0; - macro->paramc = 0; - macro->variadic = 0; - macro->count = 1; - macro->used = 1; + /* If we have been requested to expand comments into macros, + then re-enable saving of comments. */ + pfile->state.save_comments = + ! CPP_OPTION (pfile ,discard_comments_in_macro_exp); - cpp_token *value_token = ¯o->exp.tokens[0]; - value_token->type = CPP_NUMBER; - value_token->val.str.text = (const unsigned char *) "42"; - value_token->val.str.len = 2; - value_token->flags = 0; + if (pfile->cb.before_define) + pfile->cb.before_define (pfile); - cpp_hashnode *node = name_token->val.node.node; - node->type = NT_USER_MACRO; - node->value.macro = macro; + if (_cpp_create_assign (pfile ,node)) + if (pfile->cb.define) + pfile->cb.define (pfile ,pfile->directive_line ,node); - _cpp_mark_macro_used(node); - cpp_warning(pfile, CPP_W_NONE, "Assigned macro %s as 42", NODE_NAME(node)); + node->flags &= ~NODE_USED; } -#endif + diff --git "a/script_gcc_min-12\360\237\226\211/macro.cc" "b/script_gcc_min-12\360\237\226\211/macro.cc" index f12b3e5..82d8b4b 100644 --- "a/script_gcc_min-12\360\237\226\211/macro.cc" +++ "b/script_gcc_min-12\360\237\226\211/macro.cc" @@ -4130,16 +4130,292 @@ cpp_macro_definition (cpp_reader *pfile, cpp_hashnode *node, return pfile->macro_buffer; } + +//-------------------------------------------------------------------------------- +// RT extensions +//-------------------------------------------------------------------------------- + +// see directives.cc +extern const char *cpp_token_as_text(const cpp_token *token); + +// a helper function for probing where we are at in the parse +void +debug_peek_token (cpp_reader *pfile) +{ + cpp_token *tok = _cpp_lex_direct(pfile); + + cpp_error_with_line( + pfile, + CPP_DL_ERROR, + tok->src_loc, + 0, + "DEBUG: next token is: `%s`", + (const char *) cpp_token_as_text(tok) + ); + + _cpp_backup_tokens(pfile, 1); +} + +static bool +collect_macro_body_tokens (cpp_reader *pfile, + cpp_macro *macro, + unsigned int *num_extra_tokens_out, + const char *paste_op_error_msg) +{ + bool following_paste_op = false; + unsigned int num_extra_tokens = 0; + + for (vaopt_state vaopt_tracker (pfile, macro->variadic, NULL);; ) + { + cpp_token *token = NULL; + + macro = lex_expansion_token(pfile, macro); + token = ¯o->exp.tokens[macro->count++]; + + if (macro->count > 1 && token[-1].type == CPP_HASH && macro->fun_like) + { + if (token->type == CPP_MACRO_ARG + || (macro->variadic + && token->type == CPP_NAME + && token->val.node.node == pfile->spec_nodes.n__VA_OPT__)) + { + if (token->flags & PREV_WHITE) + token->flags |= SP_PREV_WHITE; + if (token[-1].flags & DIGRAPH) + token->flags |= SP_DIGRAPH; + token->flags &= ~PREV_WHITE; + token->flags |= STRINGIFY_ARG; + token->flags |= token[-1].flags & PREV_WHITE; + token[-1] = token[0]; + macro->count--; + } + else if (CPP_OPTION (pfile, lang) != CLK_ASM) + { + cpp_error(pfile, CPP_DL_ERROR, + "'#' is not followed by a macro parameter"); + return false; + } + } + + if (token->type == CPP_EOF) + { + if (following_paste_op) + { + cpp_error(pfile, CPP_DL_ERROR, paste_op_error_msg); + return false; + } + if (!vaopt_tracker.completed()) + return false; + break; + } + + if (token->type == CPP_PASTE) + { + if (macro->count == 1) + { + cpp_error(pfile, CPP_DL_ERROR, paste_op_error_msg); + return false; + } + + if (following_paste_op) + { + num_extra_tokens++; + token->val.token_no = macro->count - 1; + } + else + { + --macro->count; + token[-1].flags |= PASTE_LEFT; + if (token->flags & DIGRAPH) + token[-1].flags |= SP_DIGRAPH; + if (token->flags & PREV_WHITE) + token[-1].flags |= SP_PREV_WHITE; + } + following_paste_op = true; + } + else + following_paste_op = false; + + if (vaopt_tracker.update(token) == vaopt_state::ERROR) + return false; + } + + *num_extra_tokens_out = num_extra_tokens; + return true; +} + + //-------------------------------------------------------------------------------- +// for `#macro` directive +/* + #macro NAME ( [optional parameters] ) (body) + like _cpp_create_definition though uses paren blancing instead or requiring a single line definition. +*/ + +/* + the cpp_macro struct is defined in cpplib.h: `struct GTY(()) cpp_macro {` + it has a flexible array field in a union as a last member: cpp_token tokens[1]; +*/ + +// derived from create_iso_defined +static cpp_macro * +create_iso_macro (cpp_reader *pfile) +{ + bool following_paste_op = false; + const char *paste_op_error_msg = + N_("'##' cannot appear at either end of a macro expansion"); + unsigned int num_extra_tokens = 0; + unsigned nparms = 0; + cpp_hashnode **params = NULL; + bool varadic = false; + bool ok = false; + cpp_macro *macro = NULL; + + /* + -Saves token allocation address held in pfile->cur_token. + -Gives a new token allocation address to pfile->cur_token, that of cpp_token first. + + Neither `first` nor `saved_cur_token` are referred to again, but as I don't have a + full test bench, I will leave this as I found it. Perhaps in the future if someone + understands what this is for, they can replace this comment. -Thomas + + -Parses out a token called 'token'. 'token' does get used. + */ + cpp_token first; + cpp_token *saved_cur_token = pfile->cur_token; + pfile->cur_token = &first; + cpp_token *token = _cpp_lex_direct (pfile); + pfile->cur_token = saved_cur_token; + + /* + -For #define if the next token is a space, then it is not a function macro. + -For #macro it is always a function macro, perhaps with an empty param list. + */ + if(token->type != CPP_OPEN_PAREN){ + cpp_error_with_line( + pfile + ,CPP_DL_ERROR + ,token->src_loc + ,0 + ,"expected '(' to open arguments list, but found: %s" + ,cpp_token_as_text(token) + ); + goto out; + } + + /* + - returns parameter list for a function macro, or NULL + - returns via &arg count of parameters + - returns via &arg the varadic flag + + after parse_parms runs, the next token returned by pfile will be subsequent to the parameter list, e.g.: + 7 | #macro Q(f ,...) printf(f ,__VA_ARGS__) + | ^~~~~~ + + */ + if( !parse_params(pfile, &nparms, &varadic) ) goto out; + + // finalizes the reserved room, otherwise it will be reused on the next reserve room call. + params = (cpp_hashnode **)_cpp_commit_buff( pfile, sizeof (cpp_hashnode *) * nparms ); + token = NULL; + + // This reserves room for a new macro struct. A macro struct is variable size, the actual size will be worked out when the memory is committed. + macro = _cpp_new_macro( + pfile + ,cmk_macro + ,_cpp_reserve_room( pfile, 0, sizeof(cpp_macro) ) + ); + macro->variadic = varadic; + macro->paramc = nparms; + macro->parm.params = params; + macro->fun_like = true; + + // collects from pfile the tokens that constitute the macro body + if (!collect_macro_body_tokens(pfile, macro, &num_extra_tokens, paste_op_error_msg)) + goto out; + + // At this point, even if the body parse fails, we will say we made a macro. I'm not sure why as we haven't commited it yet, but this is what is in the code. Apparently we throw away the macro if the body does not parse. + ok = true; + + /* Don't count the CPP_EOF. */ + macro->count--; + + // commit the cpp struct to memory + // the struct reserves space for one token, the others run off the end + macro = (cpp_macro *)_cpp_commit_buff( + pfile + ,sizeof (cpp_macro) - sizeof (cpp_token) + sizeof (cpp_token) * macro->count + ); + + + /* + It might be that the first token of the macro body was preceded by white space,so + the white space flag is set. However, upon expansion, there might not be a white + space before said token, so the following code clears the flag. + */ + if (macro->count) + macro->exp.tokens[0].flags &= ~PREV_WHITE; + + /* + Identifies consecutive ## tokens (a.k.a. CPP_PASTE) that were invalid or ambiguous, + + Removes them from the main macro body, + + Stashes them at the end of the tokens[] array in the same memory, + + Sets macro->extra_tokens = 1 to signal their presence. + */ + if (num_extra_tokens) + { + /* Place second and subsequent ## or %:%: tokens in sequences of + consecutive such tokens at the end of the list to preserve + information about where they appear, how they are spelt and + whether they are preceded by whitespace without otherwise + interfering with macro expansion. Remember, this is + extremely rare, so efficiency is not a priority. */ + cpp_token *temp = (cpp_token *)_cpp_reserve_room + (pfile, 0, num_extra_tokens * sizeof (cpp_token)); + unsigned extra_ix = 0, norm_ix = 0; + cpp_token *exp = macro->exp.tokens; + for (unsigned ix = 0; ix != macro->count; ix++) + if (exp[ix].type == CPP_PASTE) + temp[extra_ix++] = exp[ix]; + else + exp[norm_ix++] = exp[ix]; + memcpy (&exp[norm_ix], temp, num_extra_tokens * sizeof (cpp_token)); + + /* Record there are extra tokens. */ + macro->extra_tokens = 1; + } + + out: + + /* + - This resets a flag in the parser’s state machine, pfile. + - The field `va_args_ok` tracks whether the current macro body is allowed to reference `__VA_ARGS__` (or more precisely, `__VA_OPT__`). + - It's set **while parsing a macro body** that might use variadic logic — particularly in `vaopt_state` tracking. + + Resetting it here ensures that future macros aren't accidentally parsed under the assumption that variadic substitution is valid. + */ + pfile->state.va_args_ok = 0; + + /* + Earlier we did: + if (!parse_params(pfile, &nparms, &variadic)) goto out; + This cleans up temporary memory used by parse_params. + */ + _cpp_unsave_parameters (pfile, nparms); + + return ok ? macro : NULL; +} + + bool -_assign_handler(cpp_reader *pfile, cpp_hashnode *node){ +_cpp_create_macro(cpp_reader *pfile, cpp_hashnode *node){ cpp_macro *macro; - if (CPP_OPTION (pfile, traditional)) - macro = _cpp_create_trad_definition (pfile); - else - macro = create_iso_definition (pfile); + macro = create_iso_macro (pfile); if (!macro) return false; @@ -4190,125 +4466,66 @@ _assign_handler(cpp_reader *pfile, cpp_hashnode *node){ +//-------------------------------------------------------------------------------- +// similar to _cpp_create_definition, though evaluates the body first and uses +// paren balancing rather than requiring a single line definition. +bool +_cpp_create_assign(cpp_reader *pfile, cpp_hashnode *node){ + cpp_macro *macro; -#if 0 -static cpp_token * -assign_name_argument(cpp_reader *pfile){ - const cpp_token *name_token = cpp_get_token(pfile); - - cpp_warning_with_line( - pfile - ,CPP_W_NONE - ,name_token->src_loc - ,0 - ,"for debug, assign name is being set to: %.*s" - ,name_token->val.str.len - ,name_token->val.str.text - ); + if (CPP_OPTION (pfile, traditional)) + macro = _cpp_create_trad_definition (pfile); + else + macro = create_iso_definition (pfile); - if(name_token->type != CPP_NAME){ - cpp_error_with_line( - pfile - ,CPP_DL_ERROR - ,name_token->src_loc - ,0 - ,"First argument to #assign must be a macro name, instead found: %.*s" - ,name_token->val.str.len - ,name_token->val.str.text - ); - return NULL; - } + if (!macro) + return false; - // export this into the wider context - cpp_token *copy = (cpp_token *) _cpp_reserve_room(pfile ,0 ,sizeof(cpp_token)); - *copy = *name_token; - return copy; -} + if (cpp_macro_p (node)) + { + if (CPP_OPTION (pfile, warn_unused_macros)) + _cpp_warn_if_unused_macro (pfile, node, NULL); -void assign_handler(cpp_reader *pfile){ + if (warn_of_redefinition (pfile, node, macro)) + { + const enum cpp_warning_reason reason + = (cpp_builtin_macro_p (node) && !(node->flags & NODE_WARN)) + ? CPP_W_BUILTIN_MACRO_REDEFINED : CPP_W_NONE; - // parse name argument - const cpp_token *name_token = assign_name_argument(pfile); - if(!name_token) return; + bool warned = + cpp_pedwarning_with_line (pfile, reason, + pfile->directive_line, 0, + "\"%s\" redefined", NODE_NAME (node)); - // create macro - cpp_macro *macro = _cpp_new_macro( - pfile - ,cmk_macro - ,_cpp_reserve_room(pfile ,0 ,sizeof(cpp_macro)) - ); + if (warned && cpp_user_macro_p (node)) + cpp_error_with_line (pfile, CPP_DL_NOTE, + node->value.macro->line, 0, + "this is the location of the previous definition"); + } + _cpp_free_definition (node); + } - macro->fun_like = 0; - macro->paramc = 0; - macro->variadic = 0; - macro->count = 1; - macro->used = 1; - - // fill value - cpp_token *value_token = ¯o->exp.tokens[0]; - value_token->type = CPP_NUMBER; - value_token->val.str.text = (const unsigned char *) "42"; - value_token->val.str.len = 2; - value_token->flags = 0; - - // enter the definition into the symbol table - cpp_hashnode *node = name_token->val.node.node; - node->type = NT_USER_MACRO; + /* Enter definition in hash table. */ + node->type = NT_USER_MACRO; node->value.macro = macro; + if (! ustrncmp (NODE_NAME (node), DSC ("__STDC_")) + && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_FORMAT_MACROS") + /* __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS are mentioned + in the C standard, as something that one must use in C++. + However DR#593 and C++11 indicate that they play no role in C++. + We special-case them anyway. */ + && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_LIMIT_MACROS") + && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_CONSTANT_MACROS")) + node->flags |= NODE_WARN; - _cpp_mark_macro_used(node); - cpp_warning(pfile ,CPP_W_NONE ,"Assigned macro %s as 42" ,NODE_NAME(node)); -} - -#endif - -#if 0 -static cpp_hashnode * -assign_name_argument(cpp_reader *pfile){ - cpp_hashnode *node = lex_macro_node(pfile); - - if( !node || cpp_ide_is_keyword(node) ){ - cpp_error(pfile ,CPP_DL_ERROR ,"First argument to #assign must be a macro name"); - return NULL; - } + /* If user defines one of the conditional macros, remove the + conditional flag */ + node->flags &= ~NODE_CONDITIONAL; - cpp_warning(pfile ,CPP_W_NONE ,"for debug, assign name is being set to: %s", NODE_NAME(node)); - return node; + return true; } -void -assign_handler(cpp_reader *pfile){ - - cpp_hashnode *node = assign_name_argument(pfile); - if( !node ) - return; // error already reported - // create macro - cpp_macro *macro = _cpp_new_macro( - pfile - ,cmk_macro - ,_cpp_reserve_room(pfile ,0 ,sizeof(cpp_macro)) - ); - macro->fun_like = 0; - macro->paramc = 0; - macro->variadic = 0; - macro->count = 1; - macro->used = 1; - - // fill value - cpp_token *value_token = ¯o->exp.tokens[0]; - value_token->type = CPP_NUMBER; - value_token->val.str.text = (const unsigned char *) "42"; - value_token->val.str.len = 2; - value_token->flags = 0; - - // install macro - node->type = NT_USER_MACRO; - node->value.macro = macro; - _cpp_mark_macro_used(node); - cpp_warning(pfile ,CPP_W_NONE ,"Assigned macro %s as 42" ,NODE_NAME(node)); -} -#endif