From: Thomas Walker Lynch Date: Wed, 19 Nov 2025 14:02:35 +0000 (+0000) Subject: . X-Git-Url: https://git.reasoningtechnology.com/?a=commitdiff_plain;h=503a3b380c63928f814152207e66532f77a7fe6a;p=Harmony.git . --- diff --git a/tool/skeleton/command.py b/tool/skeleton/command.py index 47520b1..155340a 100644 --- a/tool/skeleton/command.py +++ b/tool/skeleton/command.py @@ -12,16 +12,21 @@ Commands (semantics): import: - Shell copy commands to copy: - * nodes in B that are newer than A (same relative path), and - * nodes in B that do not exist in A at all. - Direction: B → A + * in-between nodes in B that are newer than A (same relative path), or + * in-between nodes in B that do not exist in A at all. + Direction: B -> A + Also emits: + * a mkdir list (directories to create in A) + * an "other" list for type mismatches / non-file/dir nodes. export: - Shell copy commands to copy: * nodes in A that are newer than B, and * nodes in A that do not exist in B. - Direction: A → B - (Uses the "older" list: B entries older than A → copy A→B.) + Direction: A -> B + Also emits: + * a mkdir list (directories to create in B) + * an "other" list for type mismatches / non-file/dir nodes. suspicious: - Nodes in B that fall "in between" the Harmony skeleton topology: @@ -39,82 +44,13 @@ Commands (semantics): from __future__ import annotations -import meta import os from typing import Any, Dict, List, Tuple -import skeleton +import skeleton TreeDict = Dict[str, Dict[str, Any]] -def build_import_commands( - A_tree: Dict[str, Dict[str, Any]] - ,B_tree: Dict[str, Dict[str, Any]] - ,A_root: str - ,B_root: str -) -> Tuple[List[str], List[str]]: - """ - Compute shell commands to update A from B. - - Returns: - (mkdir_cmds, cp_cmds) - - Semantics: - - mkdir_cmds: - Directories that are directories in B, but are either missing - from A or not directories in A. - We *only* ever create dirs that are missing or wrong-type on A. - - - cp_cmds: - Files (and optionally other non-directory nodes) where: - * the path does not exist in A, OR - * the node in A is not a file, OR - * the B copy is newer than A (mtime comparison). - """ - mkdir_cmds: List[str] = [] - cp_cmds: List[str] = [] - - for rel_path, b_info in B_tree.items(): - node_type = b_info.get("node_type") - - # Directories: candidate for mkdir on A if missing or wrong type. - if node_type == "directory": - a_info = A_tree.get(rel_path) - if a_info is None or a_info.get("node_type") != "directory": - # Missing or not a directory on A: mkdir -p - target_dir = os.path.join(A_root, rel_path) if rel_path else A_root - mkdir_cmds.append(f"mkdir -p '{target_dir}'") - continue - - # Files / other nodes: candidate for cp from B -> A - b_mtime = b_info.get("mtime") - a_info = A_tree.get(rel_path) - - need_copy = False - - if a_info is None: - # B-only - need_copy = True - else: - a_type = a_info.get("node_type") - if a_type != "file": - # A has non-file, B has file/other: prefer B’s version - need_copy = True - else: - # Both are files: compare mtime - a_mtime = a_info.get("mtime") - if isinstance(a_mtime, (int, float)) and isinstance(b_mtime, (int, float)): - if b_mtime > a_mtime: - need_copy = True - - if need_copy: - src = os.path.join(B_root, rel_path) if rel_path else B_root - dst = A_root # cp --parents will build the path under this root - cp_cmds.append( - f"cp --parents -a '{src}' '{dst}/'" - ) - - return mkdir_cmds, cp_cmds def shell_quote( s: str @@ -167,32 +103,131 @@ def cmd_structure( # ---------------------------------------------------------------------- -# import: copy newer / A-missing nodes from B → A +# import: B -> A (mkdir, cp, and "other" list), using in_between_newer # ---------------------------------------------------------------------- -def _keys_only_in_B( - A: TreeDict - ,B: TreeDict -) -> Iterable[str]: - keys_A = set(A.keys()) - for k in B.keys(): - if k not in keys_A: - yield k +def build_import_commands( + A_tree: TreeDict + ,B_tree: TreeDict + ,A_root: str + ,B_root: str +) -> Tuple[List[str], List[str], List[str]]: + """ + Compute shell commands to update A from B. + + Returns: + (mkdir_cmds, cp_cmds, other_list) + + Semantics: + + mkdir_cmds: + - Directories that are directories in B, but are missing in A. + - We DO NOT auto-resolve type mismatches (e.g. B=directory, + A=file); those go into other_list. + + cp_cmds: + - Files where: + * the path does not exist in A, OR + * the node in A is not a file, OR + * the B copy is newer than A (mtime comparison). + - However, if A has a non-file at that path, we treat it as a + type mismatch and add that path to other_list instead of + emitting a cp command. + + other_list: + - Human-readable notes for: + * type mismatches between A and B, and + * nodes in B that are neither 'file' nor 'directory'. + """ + mkdir_cmds: List[str] = [] + cp_cmds: List[str] = [] + other_list: List[str] = [] + + for rel_path, b_info in B_tree.items(): + b_type = b_info.get("node_type") + rel_display = rel_path if rel_path else "." + + a_info = A_tree.get(rel_path) + a_type = a_info.get("node_type") if a_info is not None else "MISSING" + + # Case 1: B node is neither file nor directory -> other_list + if b_type not in ("file", "directory"): + other_list.append( + f"{rel_display}: A={a_type}, B={b_type}" + ) + continue + + # Case 2: B directory + if b_type == "directory": + if a_info is None: + # Missing in A: safe to mkdir -p + target_dir = os.path.join(A_root, rel_path) if rel_path else A_root + mkdir_cmds.append(f"mkdir -p {shell_quote(target_dir)}") + else: + # Exists in A: must also be a directory to be "structurally OK" + if a_type != "directory": + # Type mismatch: do not mkdir, just report + other_list.append( + f"{rel_display}: A={a_type}, B=directory" + ) + continue + + # Case 3: B file + # Decide whether to copy B -> A, or report conflict. + if a_info is None: + # B-only file + src = os.path.join(B_root, rel_path) if rel_path else B_root + dst = A_root + cp_cmds.append( + f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + ) + continue + + # A has something at this path + if a_type != "file": + # Type mismatch (e.g. A=directory, B=file, or A=other) + other_list.append( + f"{rel_display}: A={a_type}, B=file" + ) + continue + + # Both files: compare mtime + a_mtime = a_info.get("mtime") + b_mtime = b_info.get("mtime") + + if isinstance(a_mtime, (int, float)) and isinstance(b_mtime, (int, float)): + if b_mtime > a_mtime: + src = os.path.join(B_root, rel_path) if rel_path else B_root + dst = A_root + cp_cmds.append( + f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + ) + + return mkdir_cmds, cp_cmds, other_list def cmd_import( - A_tree: Dict[str, Dict[str, Any]] - ,B_tree: Dict[str, Dict[str, Any]] + A_tree: TreeDict + ,B_tree: TreeDict ,A_root: str ,B_root: str ) -> int: """ - import: show directory creation and copy commands B -> A. + import: update the skeleton (A) from the project (B), + using only in_between_newer nodes. """ - mkdir_cmds, cp_cmds = build_import_commands(A_tree, B_tree, A_root, B_root) + inb_newer = skeleton.in_between_newer(A_tree, B_tree) + + mkdir_cmds, cp_cmds, other_list = build_import_commands( + A_tree + ,inb_newer + ,A_root + ,B_root + ) - print("== import: copy from B -> A ==") + print("== import: copy from B -> A (in-between newer only) ==") print(f"# A root: {A_root}") print(f"# B root: {B_root}") + print("# Only considering in-between files that are new or absent in A.") print("#") print("# Directories to create in A (mkdir -p):") @@ -209,135 +244,157 @@ def cmd_import( print(line) else: print("# (none)") + print("#") + + print("# Nodes NOT handled automatically (type mismatches / non-file/dir):") + if other_list: + for rel in other_list: + print(f"# {rel}") + else: + print("# (none)") return 0 -def cmd_import( - A: TreeDict - ,B: TreeDict +# ---------------------------------------------------------------------- +# export: A -> B (mkdir, cp, and "other" list) +# ---------------------------------------------------------------------- +def build_export_commands( + A_tree: TreeDict + ,B_tree: TreeDict ,A_root: str ,B_root: str -) -> int: +) -> Tuple[List[str], List[str], List[str]]: """ - import: B → A + Compute shell commands to update B from A. + + Returns: + (mkdir_cmds, cp_cmds, other_list) - - Newer nodes in B than A (same path): tree_dict_newer(A,B). - - Nodes present only in B (not in A). - - Only file nodes are turned into copy commands. + Semantics: - Output: shell 'cp' commands using GNU 'cp --parents -a'. + mkdir_cmds: + - Directories that are directories in A, but are missing in B. + - Type mismatches go into other_list. + + cp_cmds: + - Files where: + * the path does not exist in B, OR + * the node in B is not a file, OR + * the A copy is newer than B (mtime comparison). + - If B has a non-file while A has a file, treat as type mismatch. + + other_list: + - Human-readable notes for: + * type mismatches between A and B, and + * nodes in A that are neither 'file' nor 'directory'. """ - newer_B = skeleton.tree_dict_newer(A, B) - only_in_B_paths = list(_keys_only_in_B(A, B)) - - # Collect unique file paths to copy from B to A - paths: List[str] = [] + mkdir_cmds: List[str] = [] + cp_cmds: List[str] = [] + other_list: List[str] = [] - for k in newer_B.keys(): - if B.get(k, {}).get("node_type") == "file": - paths.append(k) + for rel_path, a_info in A_tree.items(): + a_type = a_info.get("node_type") + rel_display = rel_path if rel_path else "." - for k in only_in_B_paths: - if B.get(k, {}).get("node_type") == "file": - paths.append(k) + b_info = B_tree.get(rel_path) + b_type = b_info.get("node_type") if b_info is not None else "MISSING" - # Deduplicate while preserving order - seen = set() - unique_paths: List[str] = [] - for p in paths: - if p in seen: + # Case 1: A node is neither file nor directory -> other_list + if a_type not in ("file", "directory"): + other_list.append( + f"{rel_display}: A={a_type}, B={b_type}" + ) continue - seen.add(p) - unique_paths.append(p) - _print_header("import: copy from B → A") + # Case 2: A directory + if a_type == "directory": + if b_info is None: + # Missing in B: safe to mkdir -p + target_dir = os.path.join(B_root, rel_path) if rel_path else B_root + mkdir_cmds.append(f"mkdir -p {shell_quote(target_dir)}") + else: + # Exists in B: must also be directory + if b_type != "directory": + other_list.append( + f"{rel_display}: A=directory, B={b_type}" + ) + continue - if not unique_paths: - print("# No file nodes in B to import into A.") - return 0 + # Case 3: A file + if b_info is None: + # A-only file + src = os.path.join(A_root, rel_path) if rel_path else A_root + dst = B_root + cp_cmds.append( + f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + ) + continue - print(f"# A root: {A_root}") - print(f"# B root: {B_root}") - print("# Copy newer and B-only files from B into A:") - for rel in unique_paths: - src = os.path.join(B_root, rel) - cmd = ( - f"cp --parents -a {shell_quote(src)} " - f"{shell_quote(A_root)}/" - ) - print(cmd) + if b_type != "file": + other_list.append( + f"{rel_display}: A=file, B={b_type}" + ) + continue - return 0 + # Both files: compare mtime + a_mtime = a_info.get("mtime") + b_mtime = b_info.get("mtime") + if isinstance(a_mtime, (int, float)) and isinstance(b_mtime, (int, float)): + if a_mtime > b_mtime: + src = os.path.join(A_root, rel_path) if rel_path else A_root + dst = B_root + cp_cmds.append( + f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + ) -# ---------------------------------------------------------------------- -# export: copy newer / B-missing nodes from A → B -# ---------------------------------------------------------------------- -def _keys_only_in_A( - A: TreeDict - ,B: TreeDict -) -> Iterable[str]: - keys_B = set(B.keys()) - for k in A.keys(): - if k not in keys_B: - yield k + return mkdir_cmds, cp_cmds, other_list def cmd_export( - A: TreeDict - ,B: TreeDict + A_tree: TreeDict + ,B_tree: TreeDict ,A_root: str ,B_root: str ) -> int: """ - export: A → B - - - Nodes in B that are older than A (same path): - tree_dict_older(A,B) -> keys of interest. - For these keys, we copy from A_root/path to B_root/path. - - - Nodes present only in A (not in B). - - Only file nodes are turned into copy commands. + export: show directory creation and copy commands A -> B. """ - older_B = skeleton.tree_dict_older(A, B) - only_in_A_paths = list(_keys_only_in_A(A, B)) - - paths: List[str] = [] - - for k in older_B.keys(): - if A.get(k, {}).get("node_type") == "file": - paths.append(k) - - for k in only_in_A_paths: - if A.get(k, {}).get("node_type") == "file": - paths.append(k) - - seen = set() - unique_paths: List[str] = [] - for p in paths: - if p in seen: - continue - seen.add(p) - unique_paths.append(p) + mkdir_cmds, cp_cmds, other_list = build_export_commands( + A_tree + ,B_tree + ,A_root + ,B_root + ) + + print("== export: copy from A -> B ==") + print(f"# A root: {A_root}") + print(f"# B root: {B_root}") + print("#") - _print_header("export: copy from A → B") + print("# Directories to create in B (mkdir -p):") + if mkdir_cmds: + for line in mkdir_cmds: + print(line) + else: + print("# (none)") + print("#") - if not unique_paths: - print("# No file nodes in A to export into B.") - return 0 + print("# Files to copy from A -> B (cp --parents -a):") + if cp_cmds: + for line in cp_cmds: + print(line) + else: + print("# (none)") + print("#") - print(f"# A root: {A_root}") - print(f"# B root: {B_root}") - print("# Copy newer and A-only files from A into B:") - for rel in unique_paths: - src = os.path.join(A_root, rel) - cmd = ( - f"cp --parents -a {shell_quote(src)} " - f"{shell_quote(B_root)}/" - ) - print(cmd) + print("# Nodes NOT handled automatically (type mismatches / non-file/dir):") + if other_list: + for rel in other_list: + print(f"# {rel}") + else: + print("# (none)") return 0 @@ -352,9 +409,6 @@ def cmd_suspicious( """ suspicious: nodes in B that fall 'in between' the Harmony skeleton, not under leaf directories. - - Uses tree_dict_in_between_and_below(A,B) and prints the 'in_between' - dictionary. """ in_between, _below = skeleton.tree_dict_in_between_and_below(A, B) @@ -374,9 +428,6 @@ def cmd_addendum( ) -> int: """ addendum: nodes in B that fall 'below' Harmony leaf directories. - - These represent work added in proper extension points. - Uses the 'below' part from tree_dict_in_between_and_below(A,B). """ _in_between, below = skeleton.tree_dict_in_between_and_below(A, B) @@ -413,7 +464,6 @@ def dispatch( A_root, B_root: Root paths corresponding to A and B (for copy commands). """ - # Normalize commands cmds = set(has_other_list) if "all" in cmds: @@ -425,7 +475,6 @@ def dispatch( ,"addendum" ]) - # Preserve a deterministic run order ordered = [ "structure" ,"import" @@ -451,7 +500,6 @@ def dispatch( elif name == "addendum": rc = cmd_addendum(A, B) else: - # Unknown has_other token; ignore for now, could log later. rc = 0 if rc != 0: diff --git a/tool/skeleton/meta.py b/tool/skeleton/meta.py index dee6439..fc014f6 100644 --- a/tool/skeleton/meta.py +++ b/tool/skeleton/meta.py @@ -27,7 +27,7 @@ _Z_MODULE = load_command_module("Z") # Meta module version _major = 1 -_minor = 1 +_minor = 5 def version_print() -> None: """ Print the meta module version as MAJOR.MINOR. diff --git a/tool/skeleton/skeleton.py b/tool/skeleton/skeleton.py index 4799b64..549de93 100644 --- a/tool/skeleton/skeleton.py +++ b/tool/skeleton/skeleton.py @@ -7,13 +7,16 @@ skeleton.py - helpers for working with the Harmony skeleton tree from __future__ import annotations -import meta import os import sys from typing import Any, Callable, Dict, List, Set + +import meta from GitIgnore import GitIgnore import Harmony +TreeDict = Dict[str, Dict[str, Any]] + # tree_dict_make / tree_dict_print # # Build a dictionary describing a project tree, respecting GitIgnore. @@ -28,31 +31,19 @@ import Harmony # Values are dicts with: # 1. 'mtime' : last modification time (float seconds) # 2. 'node_type' : 'file', 'directory', or 'other' -# 3. 'dir_info' : 'not-a-directory', 'leaf', 'root' +# 3. 'dir_info' : 'NA', 'leaf', 'branch', or 'root' # 4. 'checksum' : present only for file nodes when checksum_fn is # not None # # Traversal: -# - Directories whose relative path GitIgnore.check() marks as -# 'Ignore' are included in tree_dict but not traversed further. +# - Any path (directory or file) for which GitIgnore.check() +# returns 'Ignore' is omitted from the tree_dict. def tree_dict_make( path: str ,checksum_fn: Callable[[str], int] | None ) -> Dict[str, Dict[str, Any]]: """ Build a tree_dict for the subtree rooted at , respecting GitIgnore. - - Semantics (current): - * Any path (directory or file) for which GitIgnore.check() - returns 'Ignore' is completely omitted from the tree_dict. - * The root directory ('') is always included. - * Directory dir_info: - - 'root' for the root - - 'branch' for directories that have child directories - (after GitIgnore filtering) - - 'leaf' for directories with no child directories - * Non-directory dir_info: - - 'NA' """ root = os.path.abspath(path) gi = GitIgnore(root) @@ -64,14 +55,14 @@ def tree_dict_make( if rel_dir == ".": rel_dir = "" - # Skip ignored directories (except the root). + # Skip ignored directories (except the root) if rel_dir != "" and gi.check(rel_dir) == "Ignore": dirnames[:] = [] continue # Filter child directories by GitIgnore so dir_info reflects # only directories we will actually traverse. - kept_dirnames: list[str] = [] + kept_dirnames: List[str] = [] for dn in list(dirnames): child_rel = dn if rel_dir == "" else os.path.join(rel_dir, dn) if gi.check(child_rel) == "Ignore": @@ -142,6 +133,7 @@ def tree_dict_make( return tree_dict + def tree_dict_print( tree_dict: Dict[str, Dict[str, Any]] ) -> None: @@ -153,7 +145,7 @@ def tree_dict_print( Only the values are printed in each column (no 'field=' prefixes). mtime is formatted via the Z module for human readability. """ - entries: list[tuple[str, str, str, str, str]] = [] + entries: List[tuple[str, str, str, str, str]] = [] has_checksum = False for rel_path in sorted(tree_dict.keys()): @@ -183,7 +175,7 @@ def tree_dict_print( ,display_path )) - # Compute column widths (values only) + # Compute column widths type_w = 0 dir_w = 0 mtime_w = 0 @@ -193,7 +185,7 @@ def tree_dict_print( if len(type_val) > type_w: type_w = len(type_val) if len(dir_val) > dir_w: - dir_w = len(dir_w) if False else len(dir_val) # keep RT style simple + dir_w = len(dir_val) if len(mtime_val) > mtime_w: mtime_w = len(mtime_val) if has_checksum and len(checksum_val) > checksum_w: @@ -202,7 +194,6 @@ def tree_dict_print( print("Tree dictionary contents:") for type_val, dir_val, mtime_val, checksum_val, display_path in entries: line = " " - line += type_val.ljust(type_w) line += " " line += dir_val.ljust(dir_w) @@ -230,13 +221,6 @@ def tree_dict_A_minus_B( That is, return a new tree_dict containing only those entries whose keys are present in A but NOT present in B. - - Notes: - * We treat the key sets as abstract sets; values are copied from A. - * A and B are assumed to have relative-path keys built from - independent scans (possibly with different GitIgnore instances). - The contract that the underlying filesystem trees do not overlap - is irrelevant here because we compare only the relative keys. """ result: Dict[str, Dict[str, Any]] = {} @@ -251,6 +235,7 @@ def tree_dict_A_minus_B( return result + def tree_dict_in_between_and_below( A: Dict[str, Dict[str, Any]] ,B: Dict[str, Dict[str, Any]] @@ -296,12 +281,10 @@ def tree_dict_in_between_and_below( if info.get("node_type") == "directory" ) - # 2. Compute leaf directories in A: - # leaf_dir = directory key with no other directory key as a proper descendant + # 2. Compute leaf directories in A leaf_dirs: Set[str] = set() for d in A_dir_keys: - # The root "" is never treated as a leaf; it always has descendants if d == "": continue @@ -322,24 +305,22 @@ def tree_dict_in_between_and_below( below: Dict[str, Dict[str, Any]] = {} for key, info in B.items(): - # Skip B's root; it is typically not useful in this partitioning. + # Skip B's root if key in ("", "."): continue parts = key.split(os.sep) - # Build directory ancestor chain for this B node. - # If the node itself is a directory, include its full path; - # otherwise include directories up to its parent. + # Build directory ancestor chain node_is_dir = (info.get("node_type") == "directory") ancestors: List[str] = [""] prefix = None if node_is_dir: - upto = parts # include the last component + upto = parts else: - upto = parts[:-1] # only directories above the basename + upto = parts[:-1] for part in upto: if prefix is None: @@ -364,7 +345,10 @@ def tree_dict_in_between_and_below( in_between[key] = info if meta.debug_has("tree_dict_in_between_and_below"): - tree_dict_print(result) + merged: Dict[str, Dict[str, Any]] = {} + merged.update(in_between) + merged.update(below) + tree_dict_print(merged) return in_between, below @@ -384,8 +368,7 @@ def tree_dict_newer( then k is included in the result with value B[k]. - Keys that are only in B (not in A) are ignored here; use a separate - set-difference function for "only-in-B" detection. + Keys that are only in B (not in A) are ignored here. """ result: Dict[str, Dict[str, Any]] = {} @@ -408,9 +391,10 @@ def tree_dict_newer( return result + def tree_dict_older( - A: Dict[str, Dict[str, Any]] - ,B: Dict[str, Dict[str, Any]] + A: Dict[str, Dict[str, Dict[str, Any]]] + ,B: Dict[str, Dict[str, Dict[str, Any]]] ) -> Dict[str, Dict[str, Any]]: """ Return a dictionary of nodes from B that are older than their @@ -447,3 +431,56 @@ def tree_dict_older( return result +def in_between_newer( + A: TreeDict + ,B: TreeDict +) -> TreeDict: + """ + in_between_newer(A, B) -> TreeDict + + Return the subset of B's nodes that: + + 1. Are in the 'in_between' region with respect to A's topology, and + 2. Are "newer" than A at the same path, or absent from A. + + Only file nodes in B are considered. + """ + in_between, _below = tree_dict_in_between_and_below(A, B) + + result: TreeDict = {} + + for path, b_info in in_between.items(): + b_type = b_info.get("node_type") + + # Only consider files for "newer" semantics + if b_type != "file": + continue + + b_mtime = b_info.get("mtime") + a_info = A.get(path) + + # Case 1: path not in A + if a_info is None: + result[path] = b_info + continue + + a_type = a_info.get("node_type") + + # Case 2: A has non-file, B has file + if a_type != "file": + result[path] = b_info + continue + + # Case 3: both files, compare mtime + a_mtime = a_info.get("mtime") + if ( + isinstance(a_mtime, (int, float)) + and isinstance(b_mtime, (int, float)) + and b_mtime > a_mtime + ): + result[path] = b_info + + if meta.debug_has("in_between_newer"): + tree_dict_print(result) + + return result