cp in #macro directive development

author Thomas Walker Lynch <eknp9n@reasoningtechnology.com>

Fri, 9 May 2025 09:42:08 +0000 (02:42 -0700)

committer Thomas Walker Lynch <eknp9n@reasoningtechnology.com>

Fri, 9 May 2025 09:42:08 +0000 (02:42 -0700)
author Thomas Walker Lynch <eknp9n@reasoningtechnology.com>
Fri, 9 May 2025 09:42:08 +0000 (02:42 -0700)
committer Thomas Walker Lynch <eknp9n@reasoningtechnology.com>
Fri, 9 May 2025 09:42:08 +0000 (02:42 -0700)
diff --git a/document🖉/source/lex_cc.org b/document🖉/source/lex_cc.org

new file mode 100644 (file)

index 0000000..4f3c628
--- /dev/null
+++ b/document🖉/source/lex_cc.org
@@ -0,0 +1,455 @@
+#+TITLE: lex.cc Detailed Structure and Function Index
+#+Author: Caelus, code formalist (GPT-4, OpenAI), Thomas
+#+Date:2025-05-09
+
+* Data Structures Found in Non-Static Function Signatures
+** struct context
+Used in lexer or normalization stages to track state during token reclassification or Unicode normalization.
+
+** enum cpp_token_fld_kind
+Enumeration describing the internal storage kind for a preprocessor token's value — distinguishes between identifiers, numbers, etc.
+
+** enum cpp_ttype
+Enumeration of token types recognized by the preprocessor (e.g., identifiers, punctuators, literals, etc.).
+
+** struct lit_accum
+Helper structure that accumulates string or character literal fragments during lexing.
+
+** struct normalize_state
+Tracks intermediate state during Unicode normalization of identifiers or literals.
+
+** struct token_spelling
+Structure used to store or compute the textual spelling of a token, including alternate representations (e.g., digraphs).
+* Data Structures Shared Among Functions in lex.cc
+** _cpp_buff
+Used in: _cpp_aligned_alloc, _cpp_extend_buff, _cpp_free_buff, _cpp_get_buff, _cpp_release_buff, free, is_macro, new_buff, usage  
+Temporary token buffer used during macro argument collection and expansion. Shared to manage input buffering across stages.
+
+** context
+Used in: _cpp_remaining_tokens_num_in_context, character, if, maybe_warn_bidi_on_close, on_char, rich_loc  
+State struct used in bidirectional text normalization and context-aware lexing. Functions reference it to apply UCN and bidi safety rules.
+
+** cpp_hashnode
+Used in: cpp_error, if, is_macro, lex_identifier, lex_identifier_intern, line, linemap_included_from  
+Represents identifiers and macro definitions. Shared among symbol lookup, macro parsing, and token classification functions.
+
+** cpp_token
+Used in: RESULT, _cpp_temp_token, cpp_directive_only_process, cpp_output_line_to_string, if, line, linemap_included_from, own, return  
+Token structure used to represent lexed entities passed between scanners, macro collectors, and diagnostic routines.
+
+** cpp_ttype
+Used in: is_macro, lex_string, own, return  
+Enumeration of token types (e.g., identifiers, keywords, operators). Shared by scanners and type-check logic to interpret input.
+* Non-Static Functions
+** _cpp_aligned_alloc
+- Signature: `unsigned char * _cpp_aligned_alloc (...)`
+- Purpose: Allocates a buffer with alignment suitable for vectorized scanning operations (e.g., SSE, AVX).
+
+** _cpp_append_extend_buff
+- Signature: `_cpp_buff * _cpp_append_extend_buff (...)`
+- Purpose: Appends additional space to an existing token buffer, used when macro expansions exceed initial estimates.
+
+** _cpp_clean_line
+- Signature: `void _cpp_clean_line (...)`
+- Purpose: Cleans lexer line state after processing a complete logical line.
+
+** _cpp_commit_buff
+- Signature: `void * _cpp_commit_buff (...)`
+- Purpose: Finalizes a temporary token buffer and returns a stable pointer to the committed data.
+
+** _cpp_equiv_tokens
+- Signature: `int _cpp_equiv_tokens (...)`
+- Purpose: Determines whether two tokens are equivalent, ignoring cosmetic differences such as spacing.
+
+** _cpp_extend_buff
+- Signature: `void _cpp_extend_buff (...)`
+- Purpose: Increases the capacity of a token buffer to accommodate additional tokens during macro processing.
+
+** _cpp_free_buff
+- Signature: `void _cpp_free_buff (...)`
+- Purpose: Releases memory allocated for a temporary or committed token buffer.
+
+** _cpp_get_buff
+- Signature: `_cpp_buff * _cpp_get_buff (...)`
+- Purpose: Returns a new or recycled token buffer from the internal pool, minimizing allocations.
+
+** _cpp_get_fresh_line
+- Signature: `bool _cpp_get_fresh_line (...)`
+- Purpose: Consumes input until a logical line is ready. Handles escaped newlines.
+
+** _cpp_init_lexer
+- Signature: `void _cpp_init_lexer (...)`
+- Purpose: Initializes the core lexer state: buffers, token rings, and diagnostic counters.
+
+** _cpp_init_tokenrun
+- Signature: `void _cpp_init_tokenrun (...)`
+- Purpose: Initializes a ring buffer or region for holding tokens during lexing.
+
+** _cpp_lex_direct
+- Signature: `cpp_token * _cpp_lex_direct (...)`
+- Purpose: Lexes a single token from the input without macro expansion — used for directive parsing.
+
+** _cpp_lex_identifier
+- Signature: `cpp_hashnode * _cpp_lex_identifier (...)`
+- Purpose: Lexes an identifier and returns a hashnode for it, performing UCN expansion and keyword recognition.
+
+** _cpp_lex_token
+- Signature: `const cpp_token * _cpp_lex_token (...)`
+- Purpose: Lexes the next token from the input stream, handling macro expansion and buffering.
+
+** _cpp_process_line_notes
+- Signature: `void _cpp_process_line_notes (...)`
+- Purpose: Handles mapping #line notes and diagnostic position metadata.
+
+** _cpp_release_buff
+- Signature: `void _cpp_release_buff (...)`
+- Purpose: Returns a previously used token buffer back to the internal pool for reuse.
+
+** _cpp_remaining_tokens_num_in_context
+- Signature: `int _cpp_remaining_tokens_num_in_context (...)`
+- Purpose: Returns how many tokens are left within the current lexing context.
+
+** _cpp_skip_block_comment
+- Signature: `bool _cpp_skip_block_comment (...)`
+- Purpose: Skips over block comments, optionally returning whether line state changed.
+
+** _cpp_spell_ident_ucns
+- Signature: `unsigned char * _cpp_spell_ident_ucns (...)`
+- Purpose: Generates a UTF-8 spelling for identifiers that contain Universal Character Names (UCNs).
+
+** _cpp_temp_token
+- Signature: `cpp_token * _cpp_temp_token (...)`
+- Purpose: Allocates space for a temporary token during parsing or lookahead.
+
+** _cpp_unaligned_alloc
+- Signature: `unsigned char * _cpp_unaligned_alloc (...)`
+- Purpose: Allocates unaligned memory for fallback lexers or comment scanning buffers.
+
+** cpp_alloc_token_string
+- Signature: `const uchar * cpp_alloc_token_string (...)`
+- Purpose: Allocates a fresh string buffer for a token's textual content, typically used in output or diagnostics.
+
+** cpp_avoid_paste
+- Signature: `int cpp_avoid_paste (...)`
+- Purpose: Determines whether a space is needed between two tokens to avoid unintended pasting.
+
+** cpp_force_token_locations
+- Signature: `void cpp_force_token_locations (...)`
+- Purpose: Forces the preprocessor to track source locations for all tokens, overriding lazy behavior.
+
+** cpp_get_comments
+- Signature: `cpp_comment_table * cpp_get_comments (...)`
+- Purpose: Returns a pointer to the internal comment table used for diagnostics or pretty-printing.
+
+** cpp_ideq
+- Signature: `int cpp_ideq (...)`
+- Purpose: Compares two identifiers for equality in a normalized preprocessor sense.
+
+** cpp_output_line
+- Signature: `void cpp_output_line (...)`
+- Purpose: Outputs an entire preprocessor line, including comments or tokens, to a file.
+
+** cpp_output_line_to_string
+- Signature: `unsigned char * cpp_output_line_to_string (...)`
+- Purpose: Generates a string representation of a preprocessed line for diagnostics.
+
+** cpp_output_token
+- Signature: `void cpp_output_token (...)`
+- Purpose: Writes a token to an output stream, respecting spacing and formatting rules.
+
+** cpp_peek_token
+- Signature: `const cpp_token * cpp_peek_token (...)`
+- Purpose: Returns a pointer to the next token without consuming it. Used in lookahead.
+
+** cpp_spell_token
+- Signature: `unsigned char * cpp_spell_token (...)`
+- Purpose: Computes or reconstructs the text spelling of a token from internal data.
+
+** cpp_stop_forcing_token_locations
+- Signature: `void cpp_stop_forcing_token_locations (...)`
+- Purpose: Stops forcibly tracking token locations, restoring default behavior.
+
+** cpp_token_as_text
+- Signature: `unsigned char * cpp_token_as_text (...)`
+- Purpose: Converts a token into its textual representation (used for macro debug output or trace logs).
+
+** cpp_token_len
+- Signature: `unsigned int cpp_token_len (...)`
+- Purpose: Computes the length of a token for buffer management or output purposes.
+
+** cpp_token_val_index
+- Signature: `enum cpp_token_fld_kind cpp_token_val_index (...)`
+- Purpose: Returns the kind of value stored in the token (e.g., string, identifier, number).
+
+** cpp_type2name
+- Signature: `const char * cpp_type2name (...)`
+- Purpose: Maps internal token types (e.g., CPP_NUMBER) to human-readable strings like "number".
+
+** current_ctx
+- Signature: `kind current_ctx (...)`
+- Purpose: Returns the current Unicode bidirectional context (e.g., LTR, RTL) used during lexing.
+
+** current_ctx_loc
+- Signature: `location_t current_ctx_loc (...)`
+- Purpose: Returns the source location associated with the current bidi context — for diagnostics.
+
+** current_ctx_ucn_p
+- Signature: `bool current_ctx_ucn_p (...)`
+- Purpose: Returns whether the current Unicode context allows Universal Character Names (UCNs).
+
+** init_vectorized_lexer
+- Signature: `define HAVE_init_vectorized_lexer 1
+static inline void init_vectorized_lexer (...)`
+- Purpose: Initializes vectorized scanning function pointers depending on CPU features.
+
+** on_char
+- Signature: `void on_char (...)`
+- Purpose: Handles logic when a character is encountered that might affect bidirectional or normalization context.
+
+** on_close
+- Signature: `void on_close (...)`
+- Purpose: Called when a bidirectional context-closing token (e.g., PDF) is encountered.
+
+** pop
+- Signature: `void pop (...)`
+- Purpose: Pops the current normalization or bidi context off the internal context stack.
+
+** pop_kind_at
+- Signature: `kind pop_kind_at (...)`
+- Purpose: Returns the kind of context that would be popped at a given depth (used for lookahead).
+
+** read_char
+- Signature: `char read_char (...)`
+- Purpose: Reads a character from the input buffer, optionally applying normalization or escaping rules.
+
+** search_line_fast
+- Signature: `ATTRIBUTE_NO_SANITIZE_UNDEFINED
+static const uchar * search_line_fast (...)`
+- Purpose: Fallback vectorized line scanner for supported architectures. Tries MMX, SSE, etc.
+
+** search_line_fast
+- Signature: `define AARCH64_MIN_PAGE_SIZE 4096
+
+static const uchar * search_line_fast (...)`
+- Purpose: Fallback vectorized line scanner for supported architectures. Tries MMX, SSE, etc.
+
+** search_line_mmx
+- Signature: `endif search_line_mmx (...)`
+- Purpose: Performs vectorized scanning of input using MMX instructions.
+
+** search_line_sse2
+- Signature: `endif search_line_sse2 (...)`
+- Purpose: Performs fast input scanning using SSE2 instructions on aligned buffers.
+
+** search_line_sse42
+- Signature: `endif search_line_sse42 (...)`
+- Purpose: Uses SSE4.2 instructions (e.g., `pcmpestri`) to scan for newline and comment sequences.
+* File Scope Data Structures
+- `CPP_TOKEN_FLD_ARG_NO`
+- `CPP_TOKEN_FLD_NODE`
+- `CPP_TOKEN_FLD_NONE`
+- `CPP_TOKEN_FLD_PRAGMA`
+- `CPP_TOKEN_FLD_SOURCE`
+- `CPP_TOKEN_FLD_STR`
+- `CPP_TOKEN_FLD_TOKEN_NO`
+- `Foundation`
+- `NULL`
+- `SSE1`
+- `WARRANTY`
+- `a`
+- `accum`
+- `after_backslash`
+- `all_upper`
+- `alloced`
+- `backup`
+- `bad_string`
+- `bol`
+- `break`
+- `buffer`
+- `c`
+- `category`
+- `col`
+- `cols`
+- `combined_loc`
+- `count`
+- `data`
+- `delim_len`
+- `delimited_string`
+- `dest`
+- `dflt`
+- `done`
+- `done_comment`
+- `done_string`
+- `end`
+- `end_loc`
+- `end_offset`
+- `eol`
+- `esc`
+- `extra_len`
+- `f`
+- `fallthrough_comment`
+- `false`
+- `found`
+- `fresh_line`
+- `hash`
+- `header_count`
+- `i`
+- `impl`
+- `import`
+- `index`
+- `is_block`
+- `ix`
+- `j`
+- `l`
+- `la`
+- `len`
+- `line_count`
+- `loc`
+- `m`
+- `m_custom_label`
+- `m_kind`
+- `m_loc`
+- `m_ucn`
+- `magic`
+- `mask`
+- `maybe_number_start`
+- `minimum`
+- `misalign`
+- `module_p`
+- `n`
+- `name`
+- `new_buff`
+- `next_line`
+- `not_module`
+- `nst`
+- `num_bytes`
+- `ok`
+- `ones`
+- `orig_line`
+- `out`
+- `p`
+- `peek`
+- `peek_R`
+- `peek_u`
+- `peek_u8`
+- `peektok`
+- `prefix_len`
+- `program`
+- `ptr`
+- `quote_eight`
+- `quote_first`
+- `quote_peek`
+- `raw`
+- `read_note`
+- `repl_bs`
+- `repl_cr`
+- `repl_nl`
+- `repl_qm`
+- `restart`
+- `result`
+- `ret`
+- `room`
+- `s`
+- `saw_NUL`
+- `search`
+- `search_line_fast`
+- `second_raw`
+- `shift`
+- `si`
+- `size`
+- `skipped_white`
+- `slen`
+- `sloc`
+- `slow_path`
+- `software`
+- `spell_ident`
+- `spelling`
+- `src_loc`
+- `src_range`
+- `star`
+- `start`
+- `start_loc`
+- `start_offset`
+- `sv`
+- `sz`
+- `t`
+- `terminator`
+- `tok_range`
+- `true`
+- `type`
+- `ucn_len`
+- `ucn_len_c`
+- `update_tokens_line`
+- `utf32`
+- `utf8_signifier`
+- `utf8_start`
+- `v`
+- `want_number`
+- `warn_bidi`
+- `warn_bidi_p`
+- `was`
+- `word_type`
+- `ws`
+- `xmask`
+- `zero`
+
+* Static Functions
+- `void add_line_note (...)`
+- `int skip_line_comment (...)`
+- `void skip_whitespace (...)`
+- `void lex_string (...)`
+- `void save_comment (...)`
+- `void store_comment (...)`
+- `void create_literal (...)`
+- `bool warn_in_comment (...)`
+- `int name_p (...)`
+- `void add_line_note (...)`
+- `inline word_type acc_char_mask_misalign (...)`
+- `inline word_type acc_char_replicate (...)`
+- `inline word_type acc_char_cmp (...)`
+- `inline int acc_char_index (...)`
+- `const uchar * search_line_acc_char (...)`
+- `const uchar * search_line_acc_char (...)`
+- `const uchar * search_line_fast (...)`
+- `const uchar * search_line_fast (...)`
+- `bool warn_in_comment (...)`
+- `location_t get_location_for_byte_range_in_cur_line (...)`
+- `bidi::kind get_bidi_utf8_1 (...)`
+- `bidi::kind get_bidi_utf8 (...)`
+- `bidi::kind get_bidi_ucn_1 (...)`
+- `bidi::kind get_bidi_ucn (...)`
+- `void maybe_warn_bidi_on_close (...)`
+- `void maybe_warn_bidi_on_char (...)`
+- `int skip_line_comment (...)`
+- `void skip_whitespace (...)`
+- `int name_p (...)`
+- `void warn_about_normalization (...)`
+- `bool forms_identifier_p (...)`
+- `void maybe_va_opt_error (...)`
+- `cpp_hashnode * lex_identifier_intern (...)`
+- `cpp_hashnode * lex_identifier (...)`
+- `void lex_number (...)`
+- `void create_literal (...)`
+- `bool is_macro (...)`
+- `bool is_macro_not_literal_suffix (...)`
+- `void lex_raw_string (...)`
+- `void lex_string (...)`
+- `void store_comment (...)`
+- `void save_comment (...)`
+- `bool fallthrough_comment_p (...)`
+- `tokenrun * next_tokenrun (...)`
+- `const cpp_token* _cpp_token_from_context_at (...)`
+- `void cpp_maybe_module_directive (...)`
+- `size_t utf8_to_ucn (...)`
+- `const unsigned char * cpp_digraph2name (...)`
+- `_cpp_buff * new_buff (...)`
+- `const unsigned char * do_peek_backslash (...)`
+- `const unsigned char * do_peek_next (...)`
+- `const unsigned char * do_peek_prev (...)`
+- `const unsigned char * do_peek_ident (...)`
+- `bool do_peek_module (...)`
+
+
+
+
+
diff --git a/script_gcc_min-12🖉/directives.cc b/script_gcc_min-12🖉/directives.cc

index 8ee29b3..db37dd5 100644 (file)
--- a/script_gcc_min-12🖉/directives.cc
+++ b/script_gcc_min-12🖉/directives.cc
@@ -143,29 +143,31 @@ static void cpp_pop_definition (cpp_reader *, struct def_pragma_macro *);
     #warning, #include_next, and #import are deprecated.  The name is
     where the extension appears to have come from.  */
  
-#define DIRECTIVE_TABLE                                                        \
-  D(define,    T_DEFINE = 0,   KANDR,     IN_I)                        \
-  D(include,   T_INCLUDE,      KANDR,     INCL | EXPAND)               \
-  D(endif,     T_ENDIF,        KANDR,     COND)                        \
-  D(ifdef,     T_IFDEF,        KANDR,     COND | IF_COND)              \
-  D(if,                T_IF,           KANDR,     COND | IF_COND | EXPAND)     \
-  D(else,      T_ELSE,         KANDR,     COND)                        \
-  D(ifndef,    T_IFNDEF,       KANDR,     COND | IF_COND)              \
-  D(undef,     T_UNDEF,        KANDR,     IN_I)                        \
-  D(line,      T_LINE,         KANDR,     EXPAND)                      \
-  D(elif,      T_ELIF,         STDC89,    COND | EXPAND)               \
-  D(elifdef,   T_ELIFDEF,      STDC2X,    COND | ELIFDEF)              \
-  D(elifndef,  T_ELIFNDEF,     STDC2X,    COND | ELIFDEF)              \
-  D(error,     T_ERROR,        STDC89,    0)                           \
-  D(pragma,    T_PRAGMA,       STDC89,    IN_I)                        \
-  D(warning,   T_WARNING,      EXTENSION, 0)                           \
-  D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND)            \
-  D(ident,     T_IDENT,        EXTENSION, IN_I)                        \
-  D(import,    T_IMPORT,       EXTENSION, INCL | EXPAND)  /* ObjC */   \
-  D(assert,    T_ASSERT,       EXTENSION, DEPRECATED)     /* SVR4 */   \
-  D(unassert,  T_UNASSERT,     EXTENSION, DEPRECATED)     /* SVR4 */   \
-  D(sccs,      T_SCCS,         EXTENSION, IN_I)           /*  SVR4? */ \
-  D(assign,    T_ASSIGN,       EXTENSION, IN_I)                
+#define DIRECTIVE_TABLE                                                    \
+  D(define        ,T_DEFINE = 0    ,KANDR       ,IN_I)                     \
+  D(include       ,T_INCLUDE       ,KANDR       ,INCL | EXPAND)            \
+  D(endif         ,T_ENDIF         ,KANDR       ,COND)                     \
+  D(ifdef         ,T_IFDEF         ,KANDR       ,COND | IF_COND)           \
+  D(if            ,T_IF            ,KANDR       ,COND | IF_COND | EXPAND)  \
+  D(else          ,T_ELSE          ,KANDR       ,COND)                     \
+  D(ifndef        ,T_IFNDEF        ,KANDR       ,COND | IF_COND)           \
+  D(undef         ,T_UNDEF         ,KANDR       ,IN_I)                     \
+  D(line          ,T_LINE          ,KANDR       ,EXPAND)                   \
+  D(elif          ,T_ELIF          ,STDC89      ,COND | EXPAND)            \
+  D(elifdef       ,T_ELIFDEF       ,STDC2X      ,COND | ELIFDEF)           \
+  D(elifndef      ,T_ELIFNDEF      ,STDC2X      ,COND | ELIFDEF)           \
+  D(error         ,T_ERROR         ,STDC89      ,0)                        \
+  D(pragma        ,T_PRAGMA        ,STDC89      ,IN_I)                     \
+  D(warning       ,T_WARNING       ,EXTENSION   ,0)                        \
+  D(include_next  ,T_INCLUDE_NEXT  ,EXTENSION   ,INCL | EXPAND)            \
+  D(ident         ,T_IDENT         ,EXTENSION   ,IN_I)                     \
+  D(import        ,T_IMPORT        ,EXTENSION   ,INCL | EXPAND) /* ObjC */ \
+  D(assert        ,T_ASSERT        ,EXTENSION   ,DEPRECATED)    /* SVR4 */ \
+  D(unassert      ,T_UNASSERT      ,EXTENSION   ,DEPRECATED)    /* SVR4 */ \
+  D(sccs          ,T_SCCS          ,EXTENSION   ,IN_I)         /* SVR4? */ \
+  D(macro         ,T_MACRO         ,EXTENSION   ,IN_I)                     \
+  D(assign        ,T_ASSIGN        ,EXTENSION   ,IN_I)
+
  
  /* #sccs is synonymous with #ident.  */
  #define do_sccs do_ident
@@ -2800,9 +2802,47 @@ _cpp_bracket_include(cpp_reader *pfile)
  
  
  //--------------------------------------------------------------------------------
+// RT extensions 
+//--------------------------------------------------------------------------------
+
+//--------------------------------------------------------------------------------
+// directive `#macro`
+//   #macro name (parameter [,parameter] ...) (body_expr)
+//   #macro name () (body_expr)
+//
+//   The body expr can be empty, but the parents remain
+//   Whitespace between name and parents, and between parens, is ignored
+
+extern bool _cpp_create_macro (cpp_reader *pfile, cpp_hashnode *node);
+
+static void
+do_macro (cpp_reader *pfile)
+{
+  cpp_hashnode *node = lex_macro_node(pfile, true);
+
+  if(node)
+    {
+      /* If we have been requested to expand comments into macros,
+        then re-enable saving of comments.  */
+      pfile->state.save_comments =
+       ! CPP_OPTION (pfile, discard_comments_in_macro_exp);
+
+      if(pfile->cb.before_define)
+       pfile->cb.before_define (pfile);
+
+      if( _cpp_create_macro(pfile, node) )
+       if (pfile->cb.define)
+         pfile->cb.define (pfile, pfile->directive_line, node);
+
+      node->flags &= ~NODE_USED;
+    }
+}
  
  
-extern bool _assign_handler(cpp_reader *pfile, cpp_hashnode *node);
+//--------------------------------------------------------------------------------
+// RT extention, directive `#assign`
+
+extern bool _cpp_create_assign(cpp_reader *pfile, cpp_hashnode *node);
  
  const char *
  cpp_token_as_text(const cpp_token *token)
@@ -2857,8 +2897,10 @@ cpp_token_as_text(const cpp_token *token)
    return buffer;
  }
  
-static void do_assign(cpp_reader *pfile){
+cpp_hashnode *
+_cpp_lex_paren_delim_token(cpp_reader *pfile){
    const cpp_token *tok = _cpp_lex_token(pfile);
+
    if(tok->type != CPP_OPEN_PAREN){
      cpp_error_with_line(
        pfile
@@ -2868,7 +2910,7 @@ static void do_assign(cpp_reader *pfile){
        ,"expected '(' before name ,but found: %s"
        ,cpp_token_as_text(tok)
      );
-    return;
+    return NULL;
    }
  
    tok = _cpp_lex_token(pfile);
@@ -2881,8 +2923,9 @@ static void do_assign(cpp_reader *pfile){
        ,"expected macro name identifier ,but found: %s"
        ,cpp_token_as_text(tok)
      );
-    return;
+    return NULL;
    }
+
    cpp_hashnode *node = tok->val.node.node;
  
    tok = _cpp_lex_token(pfile);
@@ -2895,164 +2938,31 @@ static void do_assign(cpp_reader *pfile){
        ,"expected ')' after macro name ,but found: %s"
        ,cpp_token_as_text(tok)
      );
-    return;
-  }
-
-  if(node){
-   /* If we have been requested to expand comments into macros,
-       then re-enable saving of comments.  */
-    pfile->state.save_comments =
-      ! CPP_OPTION (pfile ,discard_comments_in_macro_exp);
-
-    if (pfile->cb.before_define)
-      pfile->cb.before_define (pfile);
-
-    if (_assign_handler (pfile ,node))
-      if (pfile->cb.define)
-        pfile->cb.define (pfile ,pfile->directive_line ,node);
-
-    node->flags &= ~NODE_USED;
-  }
-}
-
-
-#if 0
-static void
-do_assign(cpp_reader *pfile){
-
-  // cpp_hashnode *node = lex_macro_node (pfile, true);
-  const cpp_token *tok = _cpp_lex_token(pfile);
-  if (tok->type != CPP_OPEN_PAREN) {
-    cpp_error_with_line(
-      pfile,
-      CPP_DL_ERROR,
-      tok->src_loc,
-      0,
-      "expected '(' before name, but found: %s"
-      cpp_token_as_text(tok);
-    );
-    return;
-  }
-
-  tok = _cpp_lex_token(pfile);
-  if (tok->type != CPP_NAME) {
-    cpp_error_with_line(
-      pfile,
-      CPP_DL_ERROR,
-      tok->src_loc,
-      0,
-      "expected macro name identifier, but found: type=%d text='%.*s'",
-      tok->type,
-      tok->val.str.len,
-      tok->val.str.text
-    );
-    return;
-  }
-  cpp_hashnode *node = tok->val.node.node;
-
-  tok = _cpp_lex_token(pfile);
-  if (tok->type != CPP_CLOSE_PAREN) {
-    cpp_error_with_line(
-      pfile,
-      CPP_DL_ERROR,
-      tok->src_loc,
-      0,
-      "expected ')' after macro name, but found: type=%d text='%.*s'",
-      tok->type,
-      tok->val.str.len,
-      tok->val.str.text
-    );
-    return;
-  }
-
-  if (node)
-    {
-      /* If we have been requested to expand comments into macros,
-        then re-enable saving of comments.  */
-      pfile->state.save_comments =
-       ! CPP_OPTION (pfile, discard_comments_in_macro_exp);
-
-      if (pfile->cb.before_define)
-       pfile->cb.before_define (pfile);
-
-      if (_assign_handler (pfile, node))
-       if (pfile->cb.define)
-         pfile->cb.define (pfile, pfile->directive_line, node);
-
-      node->flags &= ~NODE_USED;
-    }
-}
-#endif
-
-#if 0
-
-cpp_token *
-assign_get_name(cpp_reader *pfile){
-  //  const cpp_token *name_token = cpp_get_token(pfile);
-  const cpp_token *name_token = _cpp_lex_token(pfile);
-
-  cpp_warning_with_line(
-     pfile,
-     CPP_W_NONE,
-     name_token->src_loc,
-     0,
-     "3 assign name is being set to: %.*s",
-     name_token->val.str.len,
-     name_token->val.str.text
-  );
-
-  if (name_token->type != CPP_NAME) {
-    cpp_error_with_line(
-       pfile,
-       CPP_DL_ERROR,
-       name_token->src_loc,
-       0,
-       "First argument to #assign must be a macro name, instead found: %.*s",
-       name_token->val.str.len,
-       name_token->val.str.text
-    );
      return NULL;
    }
  
-  // Export this into the wider context
-  cpp_token *copy = (cpp_token *) _cpp_reserve_room(pfile, 0, sizeof(cpp_token));
-  *copy = *name_token;
-  return copy;
+  return node;
  }
  
-static void
-do_assign(cpp_reader *pfile)
-{
-  cpp_token *name_token = assign_get_name(pfile);
-  if (!name_token) {
-    return;
-  }
+static void do_assign(cpp_reader *pfile){
  
-  cpp_macro *macro = _cpp_new_macro(
-    pfile,
-    cmk_macro,
-    _cpp_reserve_room(pfile, 0, sizeof(cpp_macro))
-  );
+  cpp_hashnode *node = _cpp_lex_paren_delim_token(pfile);
+  if(!node) return;
  
-  macro->fun_like = 0;
-  macro->paramc   = 0;
-  macro->variadic = 0;
-  macro->count    = 1;
-  macro->used     = 1;
+  /* If we have been requested to expand comments into macros,
+     then re-enable saving of comments.  */
+  pfile->state.save_comments =
+    ! CPP_OPTION (pfile ,discard_comments_in_macro_exp);
  
-  cpp_token *value_token = &macro->exp.tokens[0];
-  value_token->type         = CPP_NUMBER;
-  value_token->val.str.text = (const unsigned char *) "42";
-  value_token->val.str.len  = 2;
-  value_token->flags        = 0;
+  if (pfile->cb.before_define)
+    pfile->cb.before_define (pfile);
  
-  cpp_hashnode *node = name_token->val.node.node;
-  node->type         = NT_USER_MACRO;
-  node->value.macro  = macro;
+  if (_cpp_create_assign (pfile ,node))
+    if (pfile->cb.define)
+      pfile->cb.define (pfile ,pfile->directive_line ,node);
  
-  _cpp_mark_macro_used(node);
-  cpp_warning(pfile, CPP_W_NONE, "Assigned macro %s as 42", NODE_NAME(node));
+  node->flags &= ~NODE_USED;
  
  }
  
-#endif
+
diff --git a/script_gcc_min-12🖉/macro.cc b/script_gcc_min-12🖉/macro.cc

index f12b3e5..82d8b4b 100644 (file)
--- a/script_gcc_min-12🖉/macro.cc
+++ b/script_gcc_min-12🖉/macro.cc
@@ -4130,16 +4130,292 @@ cpp_macro_definition (cpp_reader *pfile, cpp_hashnode *node,
    return pfile->macro_buffer;
  }
  
+
+//--------------------------------------------------------------------------------
+// RT extensions 
+//--------------------------------------------------------------------------------
+
+// see directives.cc
+extern const char *cpp_token_as_text(const cpp_token *token);
+
+// a helper function for probing where we are at in the parse
+void
+debug_peek_token (cpp_reader *pfile)
+{
+  cpp_token *tok = _cpp_lex_direct(pfile);
+
+  cpp_error_with_line(
+    pfile,
+    CPP_DL_ERROR,
+    tok->src_loc,
+    0,
+    "DEBUG: next token is: `%s`",
+    (const char *) cpp_token_as_text(tok)
+  );
+
+  _cpp_backup_tokens(pfile, 1);
+}
+
+static bool
+collect_macro_body_tokens (cpp_reader *pfile,
+                           cpp_macro *macro,
+                           unsigned int *num_extra_tokens_out,
+                           const char *paste_op_error_msg)
+{
+  bool following_paste_op = false;
+  unsigned int num_extra_tokens = 0;
+
+  for (vaopt_state vaopt_tracker (pfile, macro->variadic, NULL);; )
+    {
+      cpp_token *token = NULL;
+
+      macro = lex_expansion_token(pfile, macro);
+      token = &macro->exp.tokens[macro->count++];
+
+      if (macro->count > 1 && token[-1].type == CPP_HASH && macro->fun_like)
+        {
+          if (token->type == CPP_MACRO_ARG
+              || (macro->variadic
+                  && token->type == CPP_NAME
+                  && token->val.node.node == pfile->spec_nodes.n__VA_OPT__))
+            {
+              if (token->flags & PREV_WHITE)
+                token->flags |= SP_PREV_WHITE;
+              if (token[-1].flags & DIGRAPH)
+                token->flags |= SP_DIGRAPH;
+              token->flags &= ~PREV_WHITE;
+              token->flags |= STRINGIFY_ARG;
+              token->flags |= token[-1].flags & PREV_WHITE;
+              token[-1] = token[0];
+              macro->count--;
+            }
+          else if (CPP_OPTION (pfile, lang) != CLK_ASM)
+            {
+              cpp_error(pfile, CPP_DL_ERROR,
+                        "'#' is not followed by a macro parameter");
+              return false;
+            }
+        }
+
+      if (token->type == CPP_EOF)
+        {
+          if (following_paste_op)
+            {
+              cpp_error(pfile, CPP_DL_ERROR, paste_op_error_msg);
+              return false;
+            }
+          if (!vaopt_tracker.completed())
+            return false;
+          break;
+        }
+
+      if (token->type == CPP_PASTE)
+        {
+          if (macro->count == 1)
+            {
+              cpp_error(pfile, CPP_DL_ERROR, paste_op_error_msg);
+              return false;
+            }
+
+          if (following_paste_op)
+            {
+              num_extra_tokens++;
+              token->val.token_no = macro->count - 1;
+            }
+          else
+            {
+              --macro->count;
+              token[-1].flags |= PASTE_LEFT;
+              if (token->flags & DIGRAPH)
+                token[-1].flags |= SP_DIGRAPH;
+              if (token->flags & PREV_WHITE)
+                token[-1].flags |= SP_PREV_WHITE;
+            }
+          following_paste_op = true;
+        }
+      else
+        following_paste_op = false;
+
+      if (vaopt_tracker.update(token) == vaopt_state::ERROR)
+        return false;
+    }
+
+  *num_extra_tokens_out = num_extra_tokens;
+  return true;
+}
+
+
  //--------------------------------------------------------------------------------
+// for `#macro` directive
+/*
+   #macro NAME ( [optional parameters] ) (body)
+   like _cpp_create_definition though uses paren blancing instead or requiring a single line definition.
+*/
+
+/*
+  the cpp_macro struct is defined in cpplib.h:  `struct GTY(()) cpp_macro {`
+  it has a flexible array field in a union as a last member: cpp_token tokens[1];
+*/
+
+// derived from create_iso_defined
+static cpp_macro *
+create_iso_macro (cpp_reader *pfile)
+{
+  bool following_paste_op = false;
+  const char *paste_op_error_msg =
+    N_("'##' cannot appear at either end of a macro expansion");
+  unsigned int num_extra_tokens = 0;
+  unsigned nparms = 0;
+  cpp_hashnode **params = NULL;
+  bool varadic = false;
+  bool ok = false;
+  cpp_macro *macro = NULL;
+
+  /* 
+    -Saves token allocation address held in pfile->cur_token.
+    -Gives a new token allocation address to pfile->cur_token, that of cpp_token first.
+
+    Neither `first` nor `saved_cur_token` are referred to again, but as I don't have a
+    full test bench, I will leave this as I found it. Perhaps in the future if someone
+    understands what this is for, they can replace this comment. -Thomas
+
+    -Parses out a token called 'token'. 'token' does get used.
+  */
+  cpp_token first;
+  cpp_token *saved_cur_token = pfile->cur_token;
+  pfile->cur_token = &first;
+  cpp_token *token = _cpp_lex_direct (pfile);
+  pfile->cur_token = saved_cur_token;
+
+  /* 
+     -For #define if the next token is a space, then it is not a function macro.
+     -For #macro it is always a function macro, perhaps with an empty param list.
+  */
+  if(token->type != CPP_OPEN_PAREN){
+    cpp_error_with_line(
+      pfile
+      ,CPP_DL_ERROR
+      ,token->src_loc
+      ,0
+      ,"expected '(' to open arguments list, but found: %s"
+      ,cpp_token_as_text(token)
+    );
+    goto out;
+  }
+
+  /*
+    - returns parameter list for a function macro, or NULL
+    - returns via &arg count of parameters
+    - returns via &arg the varadic flag
+
+    after parse_parms runs, the next token returned by pfile will be subsequent to the parameter list, e.g.:
+       7 |   #macro Q(f ,...) printf(f ,__VA_ARGS__)
+         |                    ^~~~~~
+    
+  */
+  if( !parse_params(pfile, &nparms, &varadic) ) goto out;
+
+  // finalizes the reserved room, otherwise it will be reused on the next reserve room call.
+  params = (cpp_hashnode **)_cpp_commit_buff( pfile, sizeof (cpp_hashnode *) * nparms );
+  token = NULL;
+
+  // This reserves room for a new macro struct. A macro struct is variable size, the actual size will be worked out when the memory is committed.
+  macro = _cpp_new_macro(
+    pfile
+    ,cmk_macro
+    ,_cpp_reserve_room( pfile, 0, sizeof(cpp_macro) ) 
+  );
+  macro->variadic = varadic;
+  macro->paramc = nparms;
+  macro->parm.params = params;
+  macro->fun_like = true;
+
+  // collects from pfile the tokens that constitute the macro body
+  if (!collect_macro_body_tokens(pfile, macro, &num_extra_tokens, paste_op_error_msg))
+    goto out;
+
+  // At this point, even if the body parse fails, we will say we made a macro. I'm not sure why as we haven't commited it yet, but this is what is in the code. Apparently we throw away the macro if the body does not parse.
+  ok = true;
+
+  /* Don't count the CPP_EOF.  */
+  macro->count--;
+
+  // commit the cpp struct to memory
+  // the struct reserves space for one token, the others run off the end
+  macro = (cpp_macro *)_cpp_commit_buff(
+    pfile
+   ,sizeof (cpp_macro) - sizeof (cpp_token) + sizeof (cpp_token) * macro->count
+  );
+
+
+  /*
+    It might be that the first token of the macro body was preceded by white space,so
+    the white space flag is set. However, upon expansion, there might not be a white
+    space before said token, so the following code clears the flag.
+  */
+  if (macro->count)
+    macro->exp.tokens[0].flags &= ~PREV_WHITE;
+
+  /*
+    Identifies consecutive ## tokens (a.k.a. CPP_PASTE) that were invalid or ambiguous,
+
+    Removes them from the main macro body,
+
+    Stashes them at the end of the tokens[] array in the same memory,
+
+    Sets macro->extra_tokens = 1 to signal their presence.
+  */
+  if (num_extra_tokens)
+    {
+      /* Place second and subsequent ## or %:%: tokens in sequences of
+        consecutive such tokens at the end of the list to preserve
+        information about where they appear, how they are spelt and
+        whether they are preceded by whitespace without otherwise
+        interfering with macro expansion.   Remember, this is
+        extremely rare, so efficiency is not a priority.  */
+      cpp_token *temp = (cpp_token *)_cpp_reserve_room
+       (pfile, 0, num_extra_tokens * sizeof (cpp_token));
+      unsigned extra_ix = 0, norm_ix = 0;
+      cpp_token *exp = macro->exp.tokens;
+      for (unsigned ix = 0; ix != macro->count; ix++)
+       if (exp[ix].type == CPP_PASTE)
+         temp[extra_ix++] = exp[ix];
+       else
+         exp[norm_ix++] = exp[ix];
+      memcpy (&exp[norm_ix], temp, num_extra_tokens * sizeof (cpp_token));
+
+      /* Record there are extra tokens.  */
+      macro->extra_tokens = 1;
+    }
+
+ out:
+
+  /*
+    - This resets a flag in the parser’s state machine, pfile.
+    - The field `va_args_ok` tracks whether the current macro body is allowed to reference `__VA_ARGS__` (or more precisely, `__VA_OPT__`).
+    - It's set **while parsing a macro body** that might use variadic logic — particularly in `vaopt_state` tracking.
+
+    Resetting it here ensures that future macros aren't accidentally parsed under the assumption that variadic substitution is valid.
+  */
+  pfile->state.va_args_ok = 0;
+
+  /*
+    Earlier we did:
+      if (!parse_params(pfile, &nparms, &variadic)) goto out;
+    This cleans up temporary memory used by parse_params.
+  */
+  _cpp_unsave_parameters (pfile, nparms);
+
+  return ok ? macro : NULL;
+}
+
+
  
  bool
-_assign_handler(cpp_reader *pfile, cpp_hashnode *node){
+_cpp_create_macro(cpp_reader *pfile, cpp_hashnode *node){
    cpp_macro *macro;
  
-  if (CPP_OPTION (pfile, traditional))
-    macro = _cpp_create_trad_definition (pfile);
-  else
-    macro = create_iso_definition (pfile);
+  macro = create_iso_macro (pfile);
  
    if (!macro)
      return false;
@@ -4190,125 +4466,66 @@ _assign_handler(cpp_reader *pfile, cpp_hashnode *node){
  
  
  
+//--------------------------------------------------------------------------------
+// similar to _cpp_create_definition, though evaluates the body first and uses
+// paren balancing rather than requiring a single line definition.
  
+bool
+_cpp_create_assign(cpp_reader *pfile, cpp_hashnode *node){
+  cpp_macro *macro;
  
-#if 0
-static cpp_token *
-assign_name_argument(cpp_reader *pfile){
-  const cpp_token *name_token = cpp_get_token(pfile);
-
-  cpp_warning_with_line(
-     pfile
-    ,CPP_W_NONE
-    ,name_token->src_loc
-    ,0
-    ,"for debug, assign name is being set to: %.*s"
-    ,name_token->val.str.len
-    ,name_token->val.str.text
-  );
+  if (CPP_OPTION (pfile, traditional))
+    macro = _cpp_create_trad_definition (pfile);
+  else
+    macro = create_iso_definition (pfile);
  
-  if(name_token->type != CPP_NAME){
-    cpp_error_with_line(
-       pfile
-      ,CPP_DL_ERROR
-      ,name_token->src_loc
-      ,0
-      ,"First argument to #assign must be a macro name, instead found: %.*s"
-      ,name_token->val.str.len
-      ,name_token->val.str.text
-    );
-    return NULL;
-  }
+  if (!macro)
+    return false;
  
-  // export this into the wider context
-  cpp_token *copy = (cpp_token *) _cpp_reserve_room(pfile ,0 ,sizeof(cpp_token));
-  *copy = *name_token;
-  return copy;
-}
+  if (cpp_macro_p (node))
+    {
+      if (CPP_OPTION (pfile, warn_unused_macros))
+       _cpp_warn_if_unused_macro (pfile, node, NULL);
  
-void assign_handler(cpp_reader *pfile){
+      if (warn_of_redefinition (pfile, node, macro))
+       {
+          const enum cpp_warning_reason reason
+           = (cpp_builtin_macro_p (node) && !(node->flags & NODE_WARN))
+           ? CPP_W_BUILTIN_MACRO_REDEFINED : CPP_W_NONE;
  
-  // parse name argument
-  const cpp_token *name_token = assign_name_argument(pfile);
-  if(!name_token) return; 
+         bool warned = 
+           cpp_pedwarning_with_line (pfile, reason,
+                                     pfile->directive_line, 0,
+                                     "\"%s\" redefined", NODE_NAME (node));
  
-  // create macro
-  cpp_macro *macro = _cpp_new_macro(
-     pfile
-    ,cmk_macro
-    ,_cpp_reserve_room(pfile ,0 ,sizeof(cpp_macro))
-  );
+         if (warned && cpp_user_macro_p (node))
+           cpp_error_with_line (pfile, CPP_DL_NOTE,
+                                node->value.macro->line, 0,
+                        "this is the location of the previous definition");
+       }
+      _cpp_free_definition (node);
+    }
  
-  macro->fun_like = 0;
-  macro->paramc   = 0;
-  macro->variadic = 0;
-  macro->count    = 1;
-  macro->used     = 1;
-
-  // fill value
-  cpp_token *value_token = &macro->exp.tokens[0];
-  value_token->type         = CPP_NUMBER;
-  value_token->val.str.text = (const unsigned char *) "42";
-  value_token->val.str.len  = 2;
-  value_token->flags        = 0;
-
-  // enter the definition into the symbol table
-  cpp_hashnode *node = name_token->val.node.node;
-  node->type        = NT_USER_MACRO;
+  /* Enter definition in hash table.  */
+  node->type = NT_USER_MACRO;
    node->value.macro = macro;
+  if (! ustrncmp (NODE_NAME (node), DSC ("__STDC_"))
+      && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_FORMAT_MACROS")
+      /* __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS are mentioned
+        in the C standard, as something that one must use in C++.
+        However DR#593 and C++11 indicate that they play no role in C++.
+        We special-case them anyway.  */
+      && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_LIMIT_MACROS")
+      && ustrcmp (NODE_NAME (node), (const uchar *) "__STDC_CONSTANT_MACROS"))
+    node->flags |= NODE_WARN;
  
-  _cpp_mark_macro_used(node);
-  cpp_warning(pfile ,CPP_W_NONE ,"Assigned macro %s as 42" ,NODE_NAME(node));
-}
-
-#endif
-
-#if 0
-static cpp_hashnode *
-assign_name_argument(cpp_reader *pfile){
-  cpp_hashnode *node = lex_macro_node(pfile);
-
-  if( !node || cpp_ide_is_keyword(node) ){
-    cpp_error(pfile ,CPP_DL_ERROR ,"First argument to #assign must be a macro name");
-    return NULL;
-  }
+  /* If user defines one of the conditional macros, remove the
+     conditional flag */
+  node->flags &= ~NODE_CONDITIONAL;
  
-  cpp_warning(pfile ,CPP_W_NONE ,"for debug, assign name is being set to: %s", NODE_NAME(node));
-  return node;
+  return true;
  }
  
-void
-assign_handler(cpp_reader *pfile){
-
-  cpp_hashnode *node = assign_name_argument(pfile);
-  if( !node )
-    return;  // error already reported
  
-  // create macro
-  cpp_macro *macro = _cpp_new_macro(
-     pfile
-    ,cmk_macro
-    ,_cpp_reserve_room(pfile ,0 ,sizeof(cpp_macro))
-  );
  
-  macro->fun_like = 0;
-  macro->paramc   = 0;
-  macro->variadic = 0;
-  macro->count    = 1;
-  macro->used     = 1;
-
-  // fill value
-  cpp_token *value_token = &macro->exp.tokens[0];
-  value_token->type         = CPP_NUMBER;
-  value_token->val.str.text = (const unsigned char *) "42";
-  value_token->val.str.len  = 2;
-  value_token->flags        = 0;
-
-  // install macro
-  node->type        = NT_USER_MACRO;
-  node->value.macro = macro;
  
-  _cpp_mark_macro_used(node);
-  cpp_warning(pfile ,CPP_W_NONE ,"Assigned macro %s as 42" ,NODE_NAME(node));
-}
-#endif
author	Thomas Walker Lynch <eknp9n@reasoningtechnology.com>
	Fri, 9 May 2025 09:42:08 +0000 (02:42 -0700)
committer	Thomas Walker Lynch <eknp9n@reasoningtechnology.com>
	Fri, 9 May 2025 09:42:08 +0000 (02:42 -0700)
document🖉/source/lex_cc.org	[new file with mode: 0644]	patch \| blob
script_gcc_min-12🖉/directives.cc		patch \| blob \| history
script_gcc_min-12🖉/macro.cc		patch \| blob \| history