From: Thomas Walker Lynch Date: Thu, 20 Nov 2025 08:11:41 +0000 (+0000) Subject: . X-Git-Url: https://git.reasoningtechnology.com/style/static/git-favicon.png?a=commitdiff_plain;h=ccf0def4a757340e42d53a140c9561a8844b8cf2;p=Harmony.git . --- diff --git a/tool/source_skeleton_compare/check b/tool/source_skeleton_compare/check deleted file mode 120000 index 45a8ec1..0000000 --- a/tool/source_skeleton_compare/check +++ /dev/null @@ -1 +0,0 @@ -CLI.py \ No newline at end of file diff --git a/tool/source_skeleton_compare/command.py b/tool/source_skeleton_compare/command.py index 155340a..1a899b0 100644 --- a/tool/source_skeleton_compare/command.py +++ b/tool/source_skeleton_compare/command.py @@ -105,6 +105,89 @@ def cmd_structure( # ---------------------------------------------------------------------- # import: B -> A (mkdir, cp, and "other" list), using in_between_newer # ---------------------------------------------------------------------- +def build_import_commands( + A_tree: TreeDict, + B_tree: TreeDict, + A_root: str, + B_root: str +) -> Tuple[List[str], List[str], List[str]]: + """ + NEW IMPORT SEMANTICS: + • Directory nodes in B_tree: + - If missing in A: emit `cp -a B_dir A_parent/` + - If exists: skip + • File nodes: + - If missing in A: cp --parents -a + - If both files & B newer: cp --parents -a + - If type mismatch: other_list + """ + + cp_cmds: List[str] = [] + other_list: List[str] = [] + + for rel_path, b_info in B_tree.items(): + b_type = b_info.get("node_type") + rel_display = rel_path if rel_path else "." + + A_info = A_tree.get(rel_path) + A_type = A_info.get("node_type") if A_info is not None else "MISSING" + + # Anything not file or directory + if b_type not in ("file", "directory"): + other_list.append(f"{rel_display}: A={A_type}, B={b_type}") + continue + + # DIRECTORY CASE + if b_type == "directory": + A_path = Path(A_root) / rel_path + B_path = Path(B_root) / rel_path + + if A_info is None: + # Directory missing in A → proper directory import + parent = A_path.parent + cp_cmds.append( + f"cp -a {shell_quote(str(B_path))} {shell_quote(str(parent))}/" + ) + else: + # Exists but should be directory + if A_type != "directory": + other_list.append( + f"{rel_display}: A={A_type}, B=directory" + ) + continue + + # FILE CASE + if b_type == "file": + B_path = os.path.join(B_root, rel_path) + A_path = os.path.join(A_root, rel_path) + + # Missing in A → simple copy + if A_info is None: + cp_cmds.append( + f"cp --parents -a {shell_quote(B_path)} {shell_quote(A_root)}/" + ) + continue + + # Exists, but wrong type + if A_type != "file": + other_list.append( + f"{rel_display}: A={A_type}, B=file" + ) + continue + + # Compare mtimes + a_m = A_info.get("mtime") + b_m = b_info.get("mtime") + + if isinstance(a_m, (int, float)) and isinstance(b_m, (int, float)): + if b_m > a_m: + cp_cmds.append( + f"cp --parents -a {shell_quote(B_path)} {shell_quote(A_root)}/" + ) + + # No mkdir list in this new design — we removed it. + return [], cp_cmds, other_list + def build_import_commands( A_tree: TreeDict ,B_tree: TreeDict @@ -159,9 +242,13 @@ def build_import_commands( # Case 2: B directory if b_type == "directory": if a_info is None: - # Missing in A: safe to mkdir -p - target_dir = os.path.join(A_root, rel_path) if rel_path else A_root - mkdir_cmds.append(f"mkdir -p {shell_quote(target_dir)}") + # Missing in A: copy the directory recursively. + src = os.path.join(B_root, rel_path) if rel_path else B_root + # The destination should be A_root, as cp -a B_dir A_root/ will copy B_dir into A_root + dst = A_root + mkdir_cmds.append( + f"cp -a {shell_quote(src)} {shell_quote(dst)}/" + ) else: # Exists in A: must also be a directory to be "structurally OK" if a_type != "directory": @@ -176,9 +263,9 @@ def build_import_commands( if a_info is None: # B-only file src = os.path.join(B_root, rel_path) if rel_path else B_root - dst = A_root + dst = os.path.join(A_root, rel_path) if rel_path else A_root cp_cmds.append( - f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + f"cp -a {shell_quote(src)} {shell_quote(dst)}" ) continue @@ -197,14 +284,13 @@ def build_import_commands( if isinstance(a_mtime, (int, float)) and isinstance(b_mtime, (int, float)): if b_mtime > a_mtime: src = os.path.join(B_root, rel_path) if rel_path else B_root - dst = A_root + dst = os.path.join(A_root, rel_path) if rel_path else A_root cp_cmds.append( - f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + f"cp -a {shell_quote(src)} {shell_quote(dst)}" ) return mkdir_cmds, cp_cmds, other_list - def cmd_import( A_tree: TreeDict ,B_tree: TreeDict @@ -230,7 +316,7 @@ def cmd_import( print("# Only considering in-between files that are new or absent in A.") print("#") - print("# Directories to create in A (mkdir -p):") + print("# Directories to copy from B -> A (cp -a):") if mkdir_cmds: for line in mkdir_cmds: print(line) @@ -238,7 +324,7 @@ def cmd_import( print("# (none)") print("#") - print("# Files to copy from B -> A (cp --parents -a):") + print("# Files to copy from B -> A (cp -a):") if cp_cmds: for line in cp_cmds: print(line) diff --git a/tool/source_skeleton_compare/skeleton.py b/tool/source_skeleton_compare/skeleton.py index 4f51d48..ae272f9 100644 --- a/tool/source_skeleton_compare/skeleton.py +++ b/tool/source_skeleton_compare/skeleton.py @@ -17,29 +17,6 @@ import Harmony TreeDict = Dict[str, Dict[str, Any]] -# tree_dict_make / tree_dict_print -# -# Build a dictionary describing a project tree, respecting GitIgnore. -# -# tree_dict_make(, ) -> tree_dict -# -# () -> bignum | None -# -# Keys of tree_dict: -# - Relative paths from ; the root itself is stored under "". -# -# Values are dicts with: -# 1. 'mtime' : last modification time (float seconds) -# 2. 'node_type' : 'file', 'directory', or 'other' -# 3. 'dir_info' : 'NA', 'leaf', 'branch', or 'root' -# 4. 'checksum' : present only for file nodes when checksum_fn is -# not None -# -# Traversal: -# - Any path (directory or file) for which GitIgnore.check() -# returns 'Ignore' is omitted from the tree_dict. -TreeDict = Dict[str, Dict[str, Any]] - # tree_dict_make / tree_dict_print # # Build a dictionary describing a project tree, respecting GitIgnore. @@ -490,7 +467,6 @@ def tree_dict_older( return result - def in_between_newer( A: TreeDict ,B: TreeDict @@ -527,25 +503,51 @@ def in_between_newer( result: TreeDict = {} - for path, b_info in in_between.items(): + # Keep track of directories already included in the result + included_dirs: Set[str] = set() + + # Sort keys to ensure parent directories are processed before their children. + # This is crucial for the child exclusion logic to work correctly. + sorted_paths = sorted(in_between.keys(), key=len) + + for path in sorted_paths: + b_info = in_between[path] b_type = b_info.get("node_type") + # Constrained nodes: always surface so the caller can list them + # under "not handled automatically". + # Check if this path is a child of an already included directory + is_child_of_included_dir = False + for d in included_dirs: + if path.startswith(d + os.sep): + is_child_of_included_dir = True + break + + if is_child_of_included_dir: + continue + # Constrained nodes: always surface so the caller can list them # under "not handled automatically". if b_type == "constrained": result[path] = b_info + if b_type == "directory": + included_dirs.add(path) continue - # We only do "newer" semantics for regular files. - if b_type != "file": - continue + b_mtime = b_info.get("mtime") a_info = A.get(path) - # Case 1: path not in A at all -> include (new file in in-between) + # Case 1: path not in A at all -> include (new file/dir in in-between) if a_info is None: result[path] = b_info + if b_type == "directory": + included_dirs.add(path) + continue + + # We only do "newer" semantics for regular files. + if b_type != "file": continue a_type = a_info.get("node_type") @@ -553,6 +555,8 @@ def in_between_newer( # Case 2: A has non-file, B has file -> include if a_type != "file": result[path] = b_info + # Note: b_type must be "file" here due to the check above, so no need + # to check for directory inclusion. continue # Case 3: both are files; compare mtime @@ -563,8 +567,10 @@ def in_between_newer( and b_mtime > a_mtime ): result[path] = b_info + # Note: b_type must be "file" here, so no need to check for directory inclusion. if meta.debug_has("in_between_newer"): tree_dict_print(result) return result +