From 395800bee8918a61cd38dfdf8cae66a9a68ce7b8 Mon Sep 17 00:00:00 2001 From: Thomas Walker Lynch Date: Tue, 18 Nov 2025 12:46:08 +0000 Subject: [PATCH] . --- document/check_algorithm.org | 2 +- document/check_algorithm_by_Caldrin.org | 238 ++++++++ tool/{ => deprecated}/skeleton_check | 91 ++-- tool/deprecated/skeleton_commands.py | 249 +++++++++ tool/deprecated/skeleton_compare.py | 76 +++ tool/deprecated/skeleton_config.py | 32 ++ tool/deprecated/skeleton_diff_docs.py | 298 ++++++++++ tool/deprecated/skeleton_paths.py | 170 ++++++ tool/skeleton/CLI.py | 177 ++++++ tool/skeleton/GitIgnore.py | 172 ++++++ tool/skeleton/check | 1 + tool/skeleton/doc.py | 107 ++++ tool/skeleton/load_command_module.py | 103 ++++ tool/skeleton/meta.py | 80 +++ tool/skeleton/skeleton.py | 133 +++++ tool/skeleton_diff_core.py | 696 ------------------------ tool/skeleton_diff_docs.py | 232 -------- 17 files changed, 1868 insertions(+), 989 deletions(-) create mode 100644 document/check_algorithm_by_Caldrin.org rename tool/{ => deprecated}/skeleton_check (74%) create mode 100644 tool/deprecated/skeleton_commands.py create mode 100644 tool/deprecated/skeleton_compare.py create mode 100644 tool/deprecated/skeleton_config.py create mode 100644 tool/deprecated/skeleton_diff_docs.py create mode 100644 tool/deprecated/skeleton_paths.py create mode 100755 tool/skeleton/CLI.py create mode 100755 tool/skeleton/GitIgnore.py create mode 120000 tool/skeleton/check create mode 100644 tool/skeleton/doc.py create mode 100644 tool/skeleton/load_command_module.py create mode 100644 tool/skeleton/meta.py create mode 100755 tool/skeleton/skeleton.py delete mode 100644 tool/skeleton_diff_core.py delete mode 100644 tool/skeleton_diff_docs.py diff --git a/document/check_algorithm.org b/document/check_algorithm.org index d9d4f4e..f35f1bf 100644 --- a/document/check_algorithm.org +++ b/document/check_algorithm.org @@ -12,7 +12,7 @@ Initially the stack is empty. Said class has a method called `check` that returns either 'Accept' or 'Ignore'. -When traversing a project tree, typically Harmony, or , the traversing function will send each (, , ) triple to the `check` method of the git ignore class instance. For our program we have one such instance. +When traversing a project tree, typically Harmony, or , the traversing function will send each (, , ) triple to the `check` method of the git ignore class instance. For our program we have one such instance. The `check` method: 1) if the discern stack i snot empty, `check` calls the discern functions on the ignore function stack, if any of them returns 'Ignore' the `check` method immediately returns 'Ignore'. 2) if all `discern` function returns `Accept`, and the node name is '.gitignore', then: 2.1 the top of the discern stack path is checked and compared to the path to the node given to `check`. If the path is the same, then we have a strange error, we have seen two files called .gitignore in the same directory. I guess we go buy a lottery ticket. diff --git a/document/check_algorithm_by_Caldrin.org b/document/check_algorithm_by_Caldrin.org new file mode 100644 index 0000000..172b7b2 --- /dev/null +++ b/document/check_algorithm_by_Caldrin.org @@ -0,0 +1,238 @@ +skeleton_diff — Harmony skeleton vs spec +=============================================== + +0. Context +0.1 Harmony is the authoritative skeleton. +0.2 Each Harmony derived project (“”) starts as a clone of Harmony. +0.3 Over time: + 0.3.1 Harmony evolves. + 0.3.2 Individual projects evolve. + 0.3.3 Some improvements flow back to Harmony, some do not. +0.4 The goal is to detect and reason about drift between Harmony and + using a clear, explicit model of: + 0.4.1 The Harmony skeleton (authoritative source of structure). + 0.4.2 The tree (authoritative source of what actually exists). + 0.4.3 Git ignore semantics (even if approximated at first). + 0.4.4 Node level information (type, mtime, leaf status, checksum). + +1. GitIgnoreDiscern class +1.1 A “discern” function + 1.1.1 Signature: discern(project_root, rel_path, info) -> "Accept" | "Ignore". + 1.1.2 rel_path is a Path relative to project_root. + 1.1.3 info is node information (see section 2.3). +1.2 GitIgnoreDiscern instance + 1.2.1 Holds a “discern function stack”. + 1.2.2 Each entry in the stack is a triple: + (discern_function, project_root_path, rel_dir_path_of_gitignore) + 1.2.3 The stack is initialized empty. +1.3 check method + 1.3.1 Input: (project_root, rel_path, info). + 1.3.2 Behavior: + 1.3.2.1 If the discern stack is not empty: + - Iterate from top to bottom (or bottom to top, but chosen and + documented) calling each discern function. + - If any discern returns "Ignore", then check returns "Ignore". + 1.3.2.2 If all discern functions return "Accept", then: + - If the node name is ".gitignore", we handle it specially + (see 1.4). + - Otherwise, return "Accept". +1.4 Handling ".gitignore" + 1.4.1 When check sees a node whose name is ".gitignore": + 1.4.1.1 The path of the directory that contains this .gitignore is rel_dir. + 1.4.1.2 For now we do not attempt to detect “two .gitignore files in the + same directory”; that would indicate a malformed filesystem. + 1.4.1.3 We call parse_gitignore(project_root, rel_dir) which returns a + discern function. + 1.4.1.4 We push (discern_function, project_root, rel_dir) onto the + discern function stack. + 1.4.1.5 check returns "Accept" for the .gitignore file itself. +1.5 pop method + 1.5.1 The tree traversal logic must call a pop method whenever it pops + back up from a directory that had a .gitignore. + 1.5.2 pop removes the top triple from the discern function stack. +1.6 parse_gitignore approximation + 1.6.1 For now (heuristic phase): + 1.6.1.1 If rel_dir is empty (we are at the project root): + - Return a discern function that always returns "Accept". + 1.6.1.2 If rel_dir is not empty: + - Return a discern function that always returns "Ignore" for + any node under that directory (except the .gitignore itself). + 1.6.2 Future work (TODO): + 1.6.2.1 Properly parse .gitignore content. + 1.6.2.2 Build discern functions that implement the documented + semantic of .gitignore patterns. + +2. Harmony dictionary (“skeleton dictionary”) +2.1 Traversal + 2.1.1 Traverse the entire Harmony tree. + 2.1.2 This is the tree that contains the tool directory skeleton_check is + running from, or equivalently the tree rooted at $REPO_HOME. + 2.1.3 The traversal: + 2.1.3.1 Maintains a GitIgnoreDiscern instance. + 2.1.3.2 For each node encountered: + - Build rel_path, the path relative to Harmony root. + - Build info (see 2.3). + - Call gitignore.check(HARMONY_ROOT, rel_path, info). + - If "Ignore", skip the node (and if it is a dir, do not descend). + - If "Accept", proceed. +2.2 Skeleton dictionary + 2.2.1 For each accepted node: + - Use rel_path as a dictionary key. + - Store a NodeInfo object as the value. + 2.2.2 All rel_paths in this dictionary are part of the “skeleton”. +2.3 Node information (“information”) + 2.3.1 Fields: + 2.3.1.1 path_type: "file" | "dir" | "other". + 2.3.1.2 mtime: numeric modification timestamp. + 2.3.1.3 is_leaf_flag: bool, true if the node is a leaf in the tree. + 2.3.1.4 checksum: optional checksum (string or bytes) when enabled. + 2.3.2 Leaf definition: + 2.3.2.1 For directories: a dir is a leaf if there are no child nodes + recorded beneath it (no children in the dictionary). + 2.3.2.2 For files and “other”: they are trivially leaves as far as + the tree structure is concerned. +2.4 Checksum mode + 2.4.1 If the “checksum” command has been given: + 2.4.1.1 NodeInfo includes a checksum of file contents. + 2.4.1.2 For non files, checksum may be None. + 2.4.2 Computing checksums is optional and may be expensive. + +3. traversal and ignored set +3.1 Traversal of + 3.1.1 We traverse the tree (project root = other_root). + 3.1.2 We use a *separate* GitIgnoreDiscern instance for . + 3.1.3 For each node: + 3.1.3.1 Compute rel_path relative to other_root. + 3.1.3.2 Compute NodeInfo for that node. + 3.1.3.3 Call gitignore.check(other_root, rel_path, info). +3.2 Ignored set + 3.2.1 If gitignore.check says "Ignore": + 3.2.1.1 Do not descend into that node if it is a directory. + 3.2.1.2 Add rel_path to the “ignored” set for . + 3.2.2 If gitignore.check says "Accept": + 3.2.2.1 Do descend into directories. + 3.2.2.2 Do *not* add this rel_path to the “ignored” set. +3.3 Skeleton lookup while traversing + 3.3.1 For each accepted node: + 3.3.1.1 Look up rel_path in the skeleton dictionary. + 3.3.1.2 Cases: + a) Found in skeleton_dict: + - skeleton_info = skeleton_dict[rel_path]. + - If skeleton_info.is_leaf_flag is True and the + node is a directory, then: + * This directory corresponds to a skeleton leaf. + * Do not descend further in the *outer* traversal. + * Let the addendum descender handle any deeper nodes. + b) Not found in skeleton_dict: + - Add rel_path to the “addendum seed” list (see 4.1.3). + - Descend normally; deeper nodes are also addendum candidates. + +4. Comparing skeleton_dict and +4.1 Lists to construct + 4.1.1 missing_list + - Paths that exist in skeleton_dict but do not exist in + (and are not in the ignored set for ). + 4.1.2 present_but_ignored_list + - Paths that exist in skeleton_dict, and whose rel_path appears + in the ignored set. + 4.1.3 addendum_list + - Paths that exist in but not in skeleton_dict. + - This may be built by: + a) While traversing , for any accepted node whose + rel_path is not in skeleton_dict, add it to addendum_list. + b) Additionally, when we find an directory node that + corresponds to a skeleton leaf directory, an “addendum + descender” may be called starting from that node to gather + all project specific material under that subtree into + addendum_list. + 4.1.4 newer_list + - For nodes where rel_path exists in both skeleton_dict and + , and both are files: + - If mtime(other) > mtime(skeleton), add rel_path to newer_list. + 4.1.5 older_list + - As above, but mtime(other) < mtime(skeleton). + 4.1.6 different_list (checksum mode) + - If checksums are available and mtime(other) == mtime(skeleton), + but checksum(other) != checksum(skeleton), add rel_path to + different_list. + +4.2 Detailed skeleton dictionary scan + 4.2.1 For each entry in skeleton_dict: + 4.2.1.1 Let rel_path be the key, skeleton_info the value. + 4.2.1.2 If rel_path is in ignored set: + - Add rel_path to present_but_ignored_list. + - Continue to next skeleton entry. + 4.2.1.3 Construct other_path = other_root / rel_path and see if it exists. + a) If other_path does not exist: + - Add rel_path to missing_list. + - Continue. + b) If skeleton_info.is_leaf_flag is True and the + other_path is a directory: + - This is a subtree under a skeleton leaf. + - Call the addendum descender on other_path to + gather project specific additions. + - Continue after that. + c) If both sides are files: + - Compare mtimes and optional checksums and update + newer_list, older_list, different_list. + +5. Commands and reports +5.1 Commands correspond to reports or combined analyses: + 5.1.1 environment + - Print environment, including REPO_HOME and useful Harmony variables. + 5.1.2 structure + - Use skeleton_dict and to report missing directories and + structural drift. + - Interpret “between” vs “below” using skeleton leaf boundaries: + - Missing nodes under skeleton directories indicate incomplete + adoption in . + - Extra nodes under skeleton leaf directories indicate “below” + project additions. + 5.1.3 import + - Use newer_list. + - Print cp commands to copy from into Harmony. + - Do not execute the commands. + 5.1.4 export + - Use older_list. + - Print cp commands to copy from Harmony into . + - Do not execute the commands. + 5.1.5 suspicious + - Use addendum_list plus knowledge of expected zones: + - Ignore developer/, tester/, release/ (user owned). + - Ignore tool/ and tool_shared/ (tool zones). + - Anything else that is not in skeleton_dict and not in + user owned or tool zones is suspicious. + 5.1.6 addendum + - Report project specific additions under tool/ and tool_shared/ + that are not in skeleton_dict. + - Essentially a filtered view of addendum_list. + 5.1.7 checksum + - Enable checksum mode and produce different_list in addition to + newer_list and older_list. + - May be a standalone command, or a modifier to import/export. + 5.1.8 present_but_ignored + - Report skeleton paths that exist in Harmony but are ignored by + ’s gitignore semantics. + 5.1.9 all + - Run the complete suite: environment, structure, import, export, + suspicious, addendum, present_but_ignored, checksum (if enabled). +5.2 “files” is not a user facing command + 5.2.1 The “files” step is internal: it is the comparison over + skeleton_dict and that builds newer_list, older_list, + different_list. + 5.2.2 import/export use its results. + +6. Safety guarantees +6.1 No writes + 6.1.1 The program does not modify files. + 6.1.2 It only prints suggested commands and reports. +6.2 Time based comparison + 6.2.1 Newer vs older is based on mtime. If clocks are misleading, + results need manual judgment. +6.3 Git integration + 6.3.1 The tool assumes Harmony is under git so old versions exist even + after cp operations. +6.4 Exit status + 6.4.1 0 on success (even if differences are found). + 6.4.2 Non zero on argument errors or internal failures (e.g. invalid + project path). diff --git a/tool/skeleton_check b/tool/deprecated/skeleton_check similarity index 74% rename from tool/skeleton_check rename to tool/deprecated/skeleton_check index cc781fe..7051acb 100755 --- a/tool/skeleton_check +++ b/tool/deprecated/skeleton_check @@ -1,41 +1,34 @@ #!/usr/bin/env python3 # -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- -""" -skeleton_check — CLI entry point for the Harmony Skeleton Auditor - -This script wires CLI argument parsing to: - - skeleton_diff_core (core logic) - - skeleton_diff_docs (usage / help) """ - -from __future__ import annotations +skeleton_check — CLI front-end for skeleton_diff +""" import sys from pathlib import Path -from typing import List, Optional +from typing import List -from skeleton_diff_core import ( +from skeleton_commands import ( work_environment, work_structure, - work_age, work_import, work_export, - work_addendum, work_suspicious, + work_addendum, work_version, -) -from skeleton_diff_docs import ( - work_help, work_usage, ) +from skeleton_diff_docs import work_help def CLI() -> int: args_list = sys.argv[1:] + program_name = Path(sys.argv[0]).name or "skeleton_check" + if not args_list: - work_usage() + work_usage(program_name) return 1 # 1. Global dominating commands: usage, help, version @@ -48,7 +41,7 @@ def CLI() -> int: for token in args_list: if token in global_dom_commands_set: if token == "usage": - work_usage() + work_usage(program_name) elif token == "help": work_help() elif token == "version": @@ -62,7 +55,6 @@ def CLI() -> int: commands_require_other_set = { "structure", - "age", "import", "export", "suspicious", @@ -73,9 +65,9 @@ def CLI() -> int: all_commands_set = commands_no_other_set | commands_require_other_set commands_list: List[str] = [] - other_root_path: Optional[Path] = None + other_root_path: Path | None = None project_needed_flag = False - earliest_requires_index: Optional[int] = None + earliest_requires_index: int | None = None n_args = len(args_list) last_index = n_args - 1 @@ -84,11 +76,7 @@ def CLI() -> int: if token in all_commands_set: # If we already saw a project-requiring command earlier, and this is # the last token, interpret it as the project path instead of a command. - if ( - project_needed_flag - and index == last_index - and earliest_requires_index is not None - ): + if project_needed_flag and index == last_index and earliest_requires_index is not None: other_root_path = Path(token) break @@ -101,39 +89,37 @@ def CLI() -> int: else: # Not a known command: may be the project path, but only if a command # that requires a project has already been seen and this is the last arg. - if ( - project_needed_flag - and index == last_index - and earliest_requires_index is not None - ): + if project_needed_flag and index == last_index and earliest_requires_index is not None: other_root_path = Path(token) break print(f"ERROR: unknown command '{token}'.", file=sys.stderr) - work_usage() + work_usage(program_name) return 1 # 3. Post-parse checks if project_needed_flag: + # We saw at least one command that requires . if other_root_path is None: + # First project-requiring command was also the last argument: no project path left. last_command = commands_list[-1] if commands_list else "" print( f"ERROR: missing after command '{last_command}'.", file=sys.stderr, ) - work_usage() + work_usage(program_name) return 1 if not other_root_path.is_dir(): print(f"ERROR: {other_root_path} is not a directory.", file=sys.stderr) - work_usage() + work_usage(program_name) return 1 - # 4. Expand 'all' + # 4. Expand 'all' into its constituent checks expanded_commands_list: List[str] = [] if "all" in commands_list and len(commands_list) > 1: print("ERROR: 'all' cannot be combined with other commands.", file=sys.stderr) - work_usage() + work_usage(program_name) return 1 for command in commands_list: @@ -141,7 +127,6 @@ def CLI() -> int: expanded_commands_list.extend([ "environment", "structure", - "age", "import", "export", "suspicious", @@ -153,60 +138,46 @@ def CLI() -> int: commands_list = expanded_commands_list # 5. Execute commands - other_root: Optional[Path] = other_root_path + other_root: Path | None = other_root_path for command in commands_list: - print(f"\n--- Running: {command} ---") - + print(f"\\n--- Running: {command} ---") if command == "environment": work_environment() - elif command == "structure": if other_root is None: print("ERROR: 'structure' requires .", file=sys.stderr) - work_usage() + work_usage(program_name) return 1 work_structure(other_root) - - elif command == "age": - if other_root is None: - print("ERROR: 'age' requires .", file=sys.stderr) - work_usage() - return 1 - work_age(other_root, checksum_flag=False) - elif command == "import": if other_root is None: print("ERROR: 'import' requires .", file=sys.stderr) - work_usage() + work_usage(program_name) return 1 work_import(other_root) - elif command == "export": if other_root is None: print("ERROR: 'export' requires .", file=sys.stderr) - work_usage() + work_usage(program_name) return 1 work_export(other_root) - elif command == "suspicious": if other_root is None: print("ERROR: 'suspicious' requires .", file=sys.stderr) - work_usage() + work_usage(program_name) return 1 - work_suspicious(other_root, checksum_flag=False) - + work_suspicious(other_root) elif command == "addendum": if other_root is None: print("ERROR: 'addendum' requires .", file=sys.stderr) - work_usage() + work_usage(program_name) return 1 work_addendum(other_root) - else: - # Should be unreachable, because we validated commands_list. + # Should be unreachable because we validated commands_list. print(f"Unknown command: {command}") - work_usage() + work_usage(program_name) return 1 return 0 diff --git a/tool/deprecated/skeleton_commands.py b/tool/deprecated/skeleton_commands.py new file mode 100644 index 0000000..32520c0 --- /dev/null +++ b/tool/deprecated/skeleton_commands.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +""" +skeleton_commands — user-facing command implementations for skeleton_diff +""" + +import os +import sys +from pathlib import Path +from typing import List + +from skeleton_config import HARMONY_ROOT, VERSION +from skeleton_paths import index_project +from skeleton_compare import compare_harmony_to_other + + +# ---------------------------------------------------------------------- +# environment +# ---------------------------------------------------------------------- +def work_environment() -> int: + print("=== Environment ===") + print(f"REPO_HOME = {HARMONY_ROOT}") + for key, value in sorted(os.environ.items()): + if key.startswith(("HARMONY_", "REPO_", "PATH")) or "tool" in key.lower(): + print(f"{key} = {value}") + return 0 + + +# ---------------------------------------------------------------------- +# structure +# ---------------------------------------------------------------------- +def work_structure(other_root: Path) -> int: + print("=== Structure Comparison ===") + + results = compare_harmony_to_other(other_root) + + # Only consider directories for structural reports + skeleton_dir_list = [ + rel for rel, info in results.skeleton_dict.items() + if info.path_type == "dir" + ] + other_dir_list = [ + rel for rel, info in results.other_dict.items() + if info.path_type == "dir" + ] + + skeleton_dir_set = set(skeleton_dir_list) + other_dir_set = set(other_dir_list) + + missing_dir_list = sorted( + skeleton_dir_set - other_dir_set, + key=lambda p: (len(p.parts), str(p)), + ) + + new_dir_list = sorted( + other_dir_set - skeleton_dir_set, + key=lambda p: (len(p.parts), str(p)), + ) + + if missing_dir_list: + print("Missing Harmony directories in :") + for rel_dir in missing_dir_list: + print(f" [MISSING] {rel_dir}") + print() + + if new_dir_list: + print("New directories present only in :") + for rel_dir in new_dir_list: + print(f" [NEW] {rel_dir}") + print() + + if not missing_dir_list and not new_dir_list: + print("No structural directory differences detected.") + + return 0 + + +# ---------------------------------------------------------------------- +# import / export +# ---------------------------------------------------------------------- +def work_import(other_root: Path) -> int: + print("=== Import Commands (newer → Harmony) ===") + + results = compare_harmony_to_other(other_root) + newer_list = results.newer_list + + if not newer_list: + print(" No newer files in to import.") + return 0 + + for rel_path in newer_list: + src = other_root / rel_path + dst = HARMONY_ROOT / rel_path + print(f"cp {src} {dst} # clobbers older Harmony file") + + return 0 + + +def work_export(other_root: Path) -> int: + print("=== Export Commands (Harmony → ) ===") + + results = compare_harmony_to_other(other_root) + older_list = results.older_list + + if not older_list: + print(" No stale files in to export.") + return 0 + + for rel_path in older_list: + src = HARMONY_ROOT / rel_path + dst = other_root / rel_path + print(f"cp {src} {dst} # clobbers stale file in ") + + return 0 + + +# ---------------------------------------------------------------------- +# suspicious +# ---------------------------------------------------------------------- +USER_OWNED_TOP = { + "developer", + "tester", + "release", +} +TOOL_TOP = { + "tool", + "tool_shared", +} + + +def work_suspicious(other_root: Path) -> int: + print("=== Suspicious Files (clutter outside expected zones) ===") + + results = compare_harmony_to_other(other_root) + skeleton_dict = results.skeleton_dict + other_dict = results.other_dict + + suspicious_list: List[Path] = [] + + for rel_path, other_info in other_dict.items(): + if other_info.path_type != "file": + continue + + if rel_path == Path("."): + continue + + top_component = rel_path.parts[0] + + # Skip user-owned zones + if top_component in USER_OWNED_TOP: + continue + + # Skip tool zones + if top_component in TOOL_TOP: + continue + + # If Harmony knows about this file, it is not suspicious. + if rel_path in skeleton_dict: + continue + + suspicious_list.append(rel_path) + + suspicious_list.sort(key=lambda p: (len(p.parts), str(p))) + + if suspicious_list: + for rel_path in suspicious_list: + print(f" [SUSPICIOUS] {rel_path}") + else: + print(" None found.") + + return 0 + + +# ---------------------------------------------------------------------- +# addendum +# ---------------------------------------------------------------------- +def work_addendum(other_root: Path) -> int: + print("=== Addendum: New Tools in ===") + + results = compare_harmony_to_other(other_root) + skeleton_dict = results.skeleton_dict + other_dict = results.other_dict + + addendum_list: List[Path] = [] + + for rel_path, other_info in other_dict.items(): + if other_info.path_type != "file": + continue + + if rel_path == Path("."): + continue + + parts = rel_path.parts + if not parts: + continue + + top_component = parts[0] + if top_component not in TOOL_TOP: + continue + + if rel_path not in skeleton_dict: + addendum_list.append(rel_path) + + addendum_list.sort(key=lambda p: (len(p.parts), str(p))) + + if addendum_list: + for rel_path in addendum_list: + print(f" [ADDENDUM] {rel_path}") + else: + print(" None found.") + + return 0 + + +# ---------------------------------------------------------------------- +# version / usage / help (help text lives in skeleton_diff_docs) +# ---------------------------------------------------------------------- +def work_version() -> int: + print(f"skeleton_diff version {VERSION}") + return 0 + + +def work_usage(program_name: str) -> int: + print(f"Usage: {program_name} []... []") + print() + print(" is required if any commands are specified that") + print("require a project to analyze.") + print() + print("Commands:") + print(" version Show program version (Major.Minor)") + print(" help Long-form documentation") + print(" usage This short summary") + print(" environment Show key environment variables (including $REPO_HOME)") + print(" structure Compare directory structure") + print(" import Print shell commands for pulling newer skeleton") + print(" files into Harmony") + print(" export Print shell commands for pushing Harmony skeleton") + print(" files into ") + print(' suspicious List "between" files that are not in the skeleton') + print(" addendum List tool files in that do not exist in") + print(" the Harmony skeleton (project-local additions)") + print(" all Run the full set of analyses") + print() + print("Examples:") + print(f" {program_name} usage") + print(f" {program_name} structure import ../subu") + print(f" {program_name} all ../subu") + print() + print(f"Run '{program_name} help' for detailed explanations.") + return 0 diff --git a/tool/deprecated/skeleton_compare.py b/tool/deprecated/skeleton_compare.py new file mode 100644 index 0000000..ac76673 --- /dev/null +++ b/tool/deprecated/skeleton_compare.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +""" +skeleton_compare — comparison logic between Harmony skeleton and +""" + +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Set + +from skeleton_config import HARMONY_ROOT +from skeleton_paths import NodeInfo, index_project + + +@dataclass +class ComparisonResults: + skeleton_dict: Dict[Path, NodeInfo] + other_dict: Dict[Path, NodeInfo] + missing_list: List[Path] + addendum_list: List[Path] + newer_list: List[Path] + older_list: List[Path] + + +def compare_harmony_to_other(other_root: Path) -> ComparisonResults: + """ + Build Harmony and indexes and compare them. + + - missing_list: paths present in Harmony, absent in . + - addendum_list: paths present in , absent in Harmony. + - newer_list: file paths where is newer than Harmony. + - older_list: file paths where is older than Harmony. + """ + other_root = other_root.resolve() + + skeleton_dict = index_project(HARMONY_ROOT) + other_dict = index_project(other_root) + + skeleton_paths_set: Set[Path] = set(skeleton_dict.keys()) + other_paths_set: Set[Path] = set(other_dict.keys()) + + missing_list: List[Path] = sorted( + skeleton_paths_set - other_paths_set, + key=lambda p: (len(p.parts), str(p)), + ) + + addendum_list: List[Path] = sorted( + other_paths_set - skeleton_paths_set, + key=lambda p: (len(p.parts), str(p)), + ) + + newer_list: List[Path] = [] + older_list: List[Path] = [] + + for rel_path in sorted( + skeleton_paths_set & other_paths_set, + key=lambda p: (len(p.parts), str(p)), + ): + skeleton_info = skeleton_dict[rel_path] + other_info = other_dict[rel_path] + + if skeleton_info.path_type != "file" or other_info.path_type != "file": + continue + + if other_info.mtime > skeleton_info.mtime: + newer_list.append(rel_path) + elif other_info.mtime < skeleton_info.mtime: + older_list.append(rel_path) + + return ComparisonResults( + skeleton_dict=skeleton_dict, + other_dict=other_dict, + missing_list=missing_list, + addendum_list=addendum_list, + newer_list=newer_list, + older_list=older_list, + ) diff --git a/tool/deprecated/skeleton_config.py b/tool/deprecated/skeleton_config.py new file mode 100644 index 0000000..1724b20 --- /dev/null +++ b/tool/deprecated/skeleton_config.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +""" +skeleton_config — shared configuration for skeleton_diff + +Version: Major.Minor = 0.6 +Author: Thomas Walker Lynch, with Grok and Vaelorin +Date: 2025-11-18 +""" + +import os +import sys +from pathlib import Path + +MAJOR = 0 +MINOR = 6 +VERSION = f"{MAJOR}.{MINOR}" + + +def _discover_harmony_root() -> Path: + repo_home = os.getenv("REPO_HOME") + if repo_home: + root_path = Path(repo_home).resolve() + else: + # Fallback: assume current working directory is inside Harmony + root_path = Path.cwd().resolve() + if not root_path.exists(): + print("ERROR: $REPO_HOME not set or invalid. Source env_toolsmith.", file=sys.stderr) + sys.exit(1) + return root_path + + +HARMONY_ROOT = _discover_harmony_root() diff --git a/tool/deprecated/skeleton_diff_docs.py b/tool/deprecated/skeleton_diff_docs.py new file mode 100644 index 0000000..3db2d1e --- /dev/null +++ b/tool/deprecated/skeleton_diff_docs.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python3 +""" +skeleton_diff_docs — long-form help text for skeleton_diff +""" + +from pathlib import Path +from skeleton_config import VERSION + + +def work_help() -> int: + help_text = f""" +skeleton_diff — Harmony Skeleton Auditor +======================================== + +Version: {VERSION} + +1. Purpose +1.1 The skeleton_diff tool compares a Harmony project (the skeleton) with + another project () that was originally cloned from Harmony. +1.2 Over time, individual projects tend to evolve: + - Some improvements are made in projects but never pulled back to the + Harmony skeleton. + - Some improvements make it back into Harmony, leaving older projects + with stale copies of skeleton files. + - Extra directories and files appear in projects, some intentional and + some accidental. +1.3 skeleton_diff helps you see that drift clearly so that you can: + - Pull newer tooling back into the skeleton. + - Push newer skeleton files out into projects. + - Spot suspicious clutter and structural misuse of the skeleton. + +2. Invocation and Argument Rules +2.1 Basic command line form: + skeleton_diff []... [] +2.2 is required if any of the specified commands + require a project to analyze. +2.3 Commands are parsed from left to right as a list. The final argument + is interpreted as only if: + 2.3.1 At least one command that requires a project appears earlier in + the argument list, and + 2.3.2 There is at least one argument left after that command. +2.4 Dominating commands: + 2.4.1 If any of the following appear anywhere on the command line: + usage, help, version + then that command is executed and all other arguments are + ignored (including other commands and paths). + 2.4.2 This makes: + skeleton_diff usage + skeleton_diff usage . + skeleton_diff version structure ../subu + all behave as simple “usage” or “version” calls. +2.5 Commands that require : + 2.5.1 structure + 2.5.2 import + 2.5.3 export + 2.5.4 suspicious + 2.5.5 addendum + 2.5.6 all (which expands to a sequence of project commands) +2.6 Commands that do not require a project: + 2.6.1 version + 2.6.2 help + 2.6.3 usage + 2.6.4 environment +2.7 Missing project argument: + 2.7.1 If the first command that requires a project is also the last + argument, there is no argument left to serve as + , and skeleton_diff reports an error. + 2.7.2 If a command that requires a project appears before the last + argument, the last argument is interpreted as , even if + its spelling matches a command name. +2.8 Effect of “all”: + 2.8.1 The special command “all” is shorthand for: + environment, structure, import, export, suspicious, addendum + 2.8.2 “all” may not be combined with other commands. If present, it + must be the only non-dominating command on the line. + +3. Environment Expectations +3.1 Before running skeleton_diff you are expected to: + 3.1.1 Be inside a Harmony-derived project. + 3.1.2 Have already run: + source env_toolsmith + which in turn sources: + tool_shared/bespoke/env + 3.1.3 Have $REPO_HOME set to your Harmony project root. +3.2 All paths reported by skeleton_diff are relative to $REPO_HOME unless + otherwise stated. +3.3 The tool does not modify any files. It only reports differences and + prints suggested copy commands for you to run (or edit) manually. + +4. Key Concepts +4.1 Harmony root + 4.1.1 The Harmony skeleton lives at: + $REPO_HOME + 4.1.2 The tool treats $REPO_HOME as the reference layout. +4.2 project root + 4.2.1 The project root is the final argument when a project + is required, and must be a directory. + 4.2.2 It is expected to have been created by cloning the Harmony + skeleton at some point. +4.3 “Between” and “below” paths + 4.3.1 A skeleton directory is any directory that exists in the Harmony + tree, rooted at $REPO_HOME. + 4.3.2 “Between” paths: + - Consider any traversal from the project root down to a known + Harmony skeleton directory (for example, tool/, developer/, + tester/, document/, etc.). + - Any extra directories that appear along those routes in + , but do not exist in the Harmony skeleton, are + “between” paths. + - Example: Harmony has tool/CLI/, but has tool/custom/CLI/. + The inserted custom/ is a “between” directory. + - These often indicate that the developer has modified the + skeleton’s core structural spine. + 4.3.3 “Below” paths: + - Directories that are nested under existing skeleton + directories (for example, tool/custom/new_script.py under + tool/). + - These are usually normal and represent project-specific + organization and additions. + 4.3.4 skeleton_diff reports both: + - “Between” additions, which are more likely to be structural + misuse or deliberate skeleton changes. + - “Below” additions, which are more likely to be healthy + extensions of the skeleton. +4.4 User-owned vs shared zones + 4.4.1 Some areas are explicitly owned by a role and are not audited as + suspicious: + - $REPO_HOME/developer + - $REPO_HOME/tester + - $REPO_HOME/release + 4.4.2 Tools are expected under: + - $REPO_HOME/tool + - $REPO_HOME/tool_shared + 4.4.3 Directories that are covered by a .gitignore are treated as + intentionally messy and are not reported as suspicious clutter. + +5. Commands +5.1 version, help, usage (dominating, no project required) + 5.1.1 version + - Prints: + skeleton_diff version . + - Major and Minor are integers, not a decimal number. + 5.1.2 help + - Shows this detailed documentation. + 5.1.3 usage + - Shows a short, to-the-point command summary and examples. + 5.1.4 If any of these appear, skeleton_diff executes that command and + ignores all other arguments. + +5.2 environment (no project required) + 5.2.1 Prints the key environment context that skeleton_diff relies on: + - $REPO_HOME + - Other Harmony-related variables (e.g., HARMONY_* and REPO_*) + - PATH and selected tool-related variables. + 5.2.2 Useful for debugging configuration or confirming that + env_toolsmith has been sourced correctly. + +5.3 structure (requires ) + 5.3.1 Goal: discover structural drift in directories between Harmony + and . + 5.3.2 Step 1: missing Harmony directories in + - Walk the Harmony skeleton to find all directories under + $REPO_HOME. + - For each Harmony directory, skeleton_diff checks whether the + corresponding directory exists at the same relative path in + . + - Any directory that is present in Harmony but missing in + is reported as: + [MISSING] + 5.3.3 Step 2: new directories in that are not in Harmony + - Walk all directories in . + - Any directory that does not appear in Harmony at the same + relative path is considered “new”. + - New directories are classified as “between” or “below” + according to the rules in section 4.3 and reported so you can + see both structural misuse and legitimate extensions. + +5.4 import / export (both require ) + 5.4.1 Internal files step: + - Both import and export depend on a traversal of skeleton and + project files to classify them as missing/newer/older. + - The traversal is performed automatically as needed; there is no + user-visible “files” command. + 5.4.2 import + - Goal: help you pull improvements *from* *into* Harmony. + - Uses the list of files where is newer than Harmony to + generate shell commands of the form: + cp / $REPO_HOME/ + - Commands overwrite the older skeleton file. The old version is + preserved by git history. + 5.4.3 export + - Goal: help you push newer skeleton files *from* Harmony *into* + . + - Uses the list of files where is older than Harmony to + generate shell commands of the form: + cp $REPO_HOME/ / + - Commands overwrite stale files in . + 5.4.4 Both commands only print shell commands; they never execute them. + +5.5 suspicious (requires ) + 5.5.1 Goal: find “clutter” files that look out of place relative to the + Harmony skeleton. + 5.5.2 A file is not considered suspicious if it is: + - Under $REPO_HOME/developer, which is owned by the developer. + - Under $REPO_HOME/tester, which is owned by the tester. + - Under $REPO_HOME/release, which is deliberately customized by + the developer. + - Under $REPO_HOME/tool or $REPO_HOME/tool_shared, where tools + are expected to live (including substructures). + - In a directory that carries a .gitignore file, which signals + that local clutter is intended and version-control rules exist. + - Present in the Harmony skeleton itself at the same relative + path (i.e., an expected file). + 5.5.3 Any other file that appears in but has no corresponding + skeleton file and is not in the roles’ owned zones or .gitignored + directories is reported as: + [SUSPICIOUS] + 5.5.4 These “between” files are candidates to: + - Move under proper tool directories. + - Add to the skeleton. + - Remove from the project. + +5.6 addendum (requires ) + 5.6.1 Goal: find project-specific tools that might be candidates to + promote back into the Harmony skeleton. + 5.6.2 For each tool directory in : + - tool/ + - tool_shared/ + skeleton_diff walks all files under those directories and + compares them to the Harmony tool directories at the same + relative paths. + 5.6.3 Any file that exists in ’s tool or tool_shared directory + but not in the Harmony skeleton is reported as: + [ADDENDUM] + 5.6.4 These represent project-local tooling; you decide whether to: + - Keep them project-specific, or + - Move them into the shared skeleton. + +5.7 all (requires ) + 5.7.1 all is shorthand for running: + environment + structure + import + export + suspicious + addendum + 5.7.2 It may not be combined with other commands. If you need a + different sequence, list the commands explicitly. + +6. Example Workflows +6.1 Inspect a specific project’s drift + 6.1.1 From a Harmony project: + source env_toolsmith + skeleton_diff all ../subu + 6.1.2 Read: + - environment: sanity-check $REPO_HOME and related variables. + - structure: to see missing or extra directories. + - import/export: to copy improvements across. + - suspicious/addendum: to see clutter and candidate tools. +6.2 Import improvements from a project + 6.2.1 Run: + skeleton_diff import ../subu + 6.2.2 Review the printed cp commands, then run them selectively. +6.3 Refresh a stale project from the skeleton + 6.3.1 Run: + skeleton_diff export ../some_project + 6.3.2 Review cp commands, run them, and then commit in . +6.4 Quick environment and doc checks + 6.4.1 Without a project: + skeleton_diff usage + skeleton_diff help + skeleton_diff version + skeleton_diff environment + +7. Safety and Limitations +7.1 No automatic writes + 7.1.1 skeleton_diff never changes files itself. It only prints commands + and reports. +7.2 Time-based comparison + 7.2.1 “Newer” and “older” are based on filesystem modification times. + If clocks or timestamps are misleading, results may need manual + interpretation. +7.3 Directory semantics + 7.3.1 “Between” and “below” classification is a heuristic based on + the current Harmony skeleton. Some edge cases may require human + judgment. +7.4 Git integration + 7.4.1 The tool assumes that Harmony is a git repository and relies on + git history for old versions. It does not attempt to archive or + back up overwritten files. +7.5 Exit Status + 7.5.1 skeleton_diff returns: + - 0 on success (even if differences are found). + - Non-zero if arguments are invalid (e.g., missing project path) + or if a subcommand fails before producing output. +""" + print(help_text.strip()) + return 0 diff --git a/tool/deprecated/skeleton_paths.py b/tool/deprecated/skeleton_paths.py new file mode 100644 index 0000000..521d728 --- /dev/null +++ b/tool/deprecated/skeleton_paths.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +""" +skeleton_paths — filesystem indexing and ignore logic for skeleton_diff +""" + +import os +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Set, List + + +# ---------------------------------------------------------------------- +# Node information +# ---------------------------------------------------------------------- + +@dataclass +class NodeInfo: + path_type: str # "file", "dir", or "other" + mtime: float + is_leaf_flag: bool = False + + +# ---------------------------------------------------------------------- +# Built-in ignore rules and helper +# ---------------------------------------------------------------------- + +def _is_builtin_ignored(rel_path: Path) -> bool: + """ + Cheap ignore filter for common junk: + - .git tree + - __pycache__ and Python bytecode + - notebook / pytest caches + - editor backup files (*~, *.bak) + """ + parts = rel_path.parts + + # Ignore anything under a .git directory + if ".git" in parts: + return True + + name = parts[-1] if parts else "" + + # Directories by name + if name in { + "__pycache__", + ".ipynb_checkpoints", + ".pytest_cache", + }: + return True + + # Python bytecode / compiled + lower_name = name.lower() + if lower_name.endswith(".pyc") or lower_name.endswith(".pyo") or lower_name.endswith(".pyd"): + return True + + # Editor backup / temp + if lower_name.endswith("~") or lower_name.endswith(".bak"): + return True + + return False + + +def should_ignore_node(project_root: Path, rel_path: Path) -> bool: + """ + Single "ignore this?" decision point. + + For now we: + - apply built-in patterns, and + - treat any directory that has a .gitignore file as fully ignored + (except for the .gitignore file itself). + + TODO: + - Parse .gitignore files properly and obey their patterns. + """ + if not rel_path.parts: + # Root is never ignored. + return False + + # Built-in patterns + if _is_builtin_ignored(rel_path): + return True + + absolute_path = project_root / rel_path + parent_dir = absolute_path.parent + + # If parent directory has a .gitignore, ignore this node. + # (We do not ignore the .gitignore file itself.) + if (parent_dir / ".gitignore").exists() and absolute_path.name != ".gitignore": + return True + + return False + + +# ---------------------------------------------------------------------- +# Indexing +# ---------------------------------------------------------------------- + +def _make_node_info(path: Path) -> NodeInfo: + if path.is_file(): + path_type = "file" + elif path.is_dir(): + path_type = "dir" + else: + path_type = "other" + + try: + mtime = path.stat().st_mtime + except OSError: + mtime = 0.0 + + return NodeInfo( + path_type=path_type, + mtime=mtime, + is_leaf_flag=False, + ) + + +def index_project(project_root: Path) -> Dict[Path, NodeInfo]: + """ + Walk an entire project tree and build a dictionary mapping + relative paths -> NodeInfo, applying the ignore filter. + + The relative path Path(".") is used for the project root. + """ + project_root = project_root.resolve() + node_dict: Dict[Path, NodeInfo] = {} + + # Always register the root + root_rel_path = Path(".") + node_dict[root_rel_path] = _make_node_info(project_root) + + for dirpath_str, dirnames_list, filenames_list in os.walk(project_root): + dirpath = Path(dirpath_str) + rel_dir = dirpath.relative_to(project_root) + + # Filter directory traversal in-place so os.walk will skip ignored dirs. + keep_dirnames_list: List[str] = [] + for dirname in dirnames_list: + child_rel = (rel_dir / dirname) if rel_dir != Path(".") else Path(dirname) + if should_ignore_node(project_root, child_rel): + continue + keep_dirnames_list.append(dirname) + dirnames_list[:] = keep_dirnames_list + + # Ensure directory node itself is recorded + if rel_dir not in node_dict: + node_dict[rel_dir] = _make_node_info(dirpath) + + # Record files + for filename in filenames_list: + rel_file = (rel_dir / filename) if rel_dir != Path(".") else Path(filename) + if should_ignore_node(project_root, rel_file): + continue + abs_file = dirpath / filename + node_dict[rel_file] = _make_node_info(abs_file) + + # Second pass: mark leaf directories + parent_dir_set: Set[Path] = set() + for rel_path in node_dict.keys(): + if rel_path == Path("."): + continue + parent = rel_path.parent + parent_dir_set.add(parent) + + for rel_path, info in node_dict.items(): + if info.path_type == "dir": + if rel_path not in parent_dir_set: + info.is_leaf_flag = True + + return node_dict diff --git a/tool/skeleton/CLI.py b/tool/skeleton/CLI.py new file mode 100755 index 0000000..18467cd --- /dev/null +++ b/tool/skeleton/CLI.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +CLI.py - command classification and debug printer + +Grammar (informal): + + check * [] + + :: | | + + :: version | help | usage + :: environment + :: structure | import | export | suspicious | addendum | all + +Commands are sorted into three sets: + 1. HELP_COMMANDS + 2. NO_OTHER_COMMANDS + 3. HAS_OTHER_COMMANDS + +At runtime, argv commands are classified into four lists: + 1. help_list + 2. no_other_list + 3. has_other_list + 4. unclassified_list + +If the meta debug set contains the tag "Command", these four lists +are printed. + +If 'environment' appears in no_other_list, the meta.printenv() helper +is invoked to print the environment. +""" + +from __future__ import annotations + +import sys +from typing import Sequence +import meta +import doc +import skeleton + +meta.debug_set("print_command_lists") + +# print_command_lists tag sets (classification universe) +HELP_COMMANDS: set[str] = set([ + "version" + ,"help" + ,"usage" +]) + +NO_OTHER_COMMANDS: set[str] = set([ + "environment" +]) + +HAS_OTHER_COMMANDS: set[str] = set([ + "structure" + ,"import" + ,"export" + ,"suspicious" + ,"addendum" + ,"all" +]) + + +def command_type(arg: str) -> str: + """ + Classify a single command token. + + Returns: + "Help" if arg is a help command + "NoOther" if arg is a no_other command + "HasOther" if arg is a has_other command + "UnClassified" otherwise + """ + if arg in HELP_COMMANDS: + return "Help" + + if arg in NO_OTHER_COMMANDS: + return "NoOther" + + if arg in HAS_OTHER_COMMANDS: + return "HasOther" + + return "UnClassified" + + +def print_command_lists( + help_list: list[str] + ,no_other_list: list[str] + ,has_other_list: list[str] + ,unclassified_list: list[str] +) -> None: + """ + Print the four classified command lists derived from argv. + """ + print("help_list:", help_list) + print("no_other_list:", no_other_list) + print("has_other_list:", has_other_list) + print("unclassified_list:", unclassified_list) + + +def CLI(argv: Sequence[str] | None = None) -> int: + """ + CLI entrypoint. + + Responsibilities: + 1. Accept argv (or sys.argv[1:] by default). + 2. Classify each argument using command_type(). + 3. Invoke behaviors implied by the commands. + 4. Return integer status code. + + Behavior (current): + 1. Build four lists, in argv order: + - help_list + - no_other_list + - has_other_list + - unclassified_list + 2. If "print_command_lists" is enabled in meta's debug set, print those lists. + 3. If 'environment' is present in no_other_list, call meta.printenv(). + 4. If any help commands appear, handle them and return 1. + """ + if argv is None: + argv = sys.argv[1:] + + help_list: list[str] = [] + no_other_list: list[str] = [] + has_other_list: list[str] = [] + unclassified_list: list[str] = [] + + for arg in argv: + ct = command_type(arg) + + if ct == "Help": + help_list.append(arg) + elif ct == "NoOther": + no_other_list.append(arg) + elif ct == "HasOther": + has_other_list.append(arg) + else: + unclassified_list.append(arg) + + if meta.debug_has("print_command_lists"): + print_command_lists( + help_list + ,no_other_list + ,has_other_list + ,unclassified_list + ) + + if len(help_list) > 0: + if "version" in help_list: + meta.version_print() + if "usage" in help_list: + doc.print_usage() + if "help" in help_list: + doc.print_help() + return 1 + + status,Harmony_root = skeleton.where_is_Harmony() + if status == 'different': + print("Seems we are not running in the Harmony project, will exit.") + return 2 + if status == 'not-found': + print("Harmony project not found, normally this command is run from with Harmony.") + return 3 + + ret_val = 0 + if "environment" in no_other_list: + env_status = meta.printenv() + if env_status != 0: + ret_val = env_status + + return ret_val + +if __name__ == "__main__": + raise SystemExit(CLI()) diff --git a/tool/skeleton/GitIgnore.py b/tool/skeleton/GitIgnore.py new file mode 100755 index 0000000..52f3f34 --- /dev/null +++ b/tool/skeleton/GitIgnore.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +gitignore.py - minimal GitIgnore-style helper for Harmony projects + +GitIgnore instances track which parts of a project tree are governed by +per-directory '.gitignore' files (excluding the top-level one). + +Heuristic: + + 1. During initialization, traverse the project tree rooted at + . + + 2. If the top-level directory (the project root) has a '.gitignore' + file, ignore it for the purposes of this helper. + + 3. For any other directory in the tree that contains a file literally + named '.gitignore', record that directory's relative path (from the + project root) into an internal set — but only if none of its + prefixes are already present in the set. + + 4. check() -> token: + - If ANY prefix of is found in the set, return 'Ignore'. + - Otherwise, return 'Accept'. +""" + +from __future__ import annotations + +import os +from typing import Set + +import skeleton + + +class GitIgnore: + """ + GitIgnore(project_path) + + Attributes: + project_path: + Absolute path to the project root. + prefixes: + A set of relative directory paths (from project root) where a + '.gitignore' file exists, excluding the top-level project root. + """ + + def __init__( + self + ,project_path: str + ) -> None: + """ + Initialize a GitIgnore instance with a path to a project and + scan for per-directory '.gitignore' files (excluding the root). + """ + self.project_path: str = os.path.abspath(project_path) + self.prefixes: Set[str] = set() + + self._scan_project() + + def _scan_project(self) -> None: + """ + Traverse the project tree and populate self.prefixes with the + relative paths of directories (excluding the project root) + that contain a '.gitignore' file. + + Only add a directory if none of its prefixes are already in + self.prefixes. This avoids redundant entries such as: + + tool_shared/third_party + tool_shared/third_party/upstream + """ + root = self.project_path + + for dirpath, dirnames, filenames in os.walk(root, topdown=True): + rel_dir = os.path.relpath(dirpath, root) + + # Normalize '.' to empty for clarity + if rel_dir == ".": + rel_dir = "" + + if ".gitignore" not in filenames: + continue + + # Skip the top-level '.gitignore' (if any) + if rel_dir == "": + continue + + # Only add this directory if none of its prefixes are + # already present in self.prefixes. + parts = rel_dir.split(os.sep) + prefix = None + skip = False + + for part in parts: + if prefix is None: + prefix = part + else: + prefix = os.path.join(prefix, part) + + if prefix in self.prefixes: + skip = True + break + + if skip: + continue + + self.prefixes.add(rel_dir) + + def check( + self + ,path: str + ) -> str: + """ + Check a path against the recorded '.gitignore' directories. + + path: + A path relative to the project root. + + Returns: + 'Ignore' if any prefix of is in self.prefixes, otherwise + 'Accept'. + """ + # Normalize the incoming path to a consistent form + norm = os.path.normpath(path) + + # If the path is '.' or empty, treat as root-relative with no prefix + if norm in ("", "."): + return "Accept" + + parts = norm.split(os.sep) + + # Build incremental prefixes: 'a', 'a/b', 'a/b/c', ... + prefix = None + for part in parts: + if prefix is None: + prefix = part + else: + prefix = os.path.join(prefix, part) + + if prefix in self.prefixes: + return "Ignore" + + return "Accept" + + +def test_GitIgnore() -> int: + """ + 1. Locate the Harmony project root using skeleton.where_is_Harmony(). + 2. Create a GitIgnore instance rooted at that path. + 3. Print a list of all directories (relative) that would be ignored. + """ + status, Harmony_root = skeleton.where_is_Harmony() + + if status == "not-found": + print("Harmony project not found; cannot test GitIgnore.") + return 1 + + if status == "different": + print("Warning: Harmony not found, using nearest .git directory for GitIgnore test.") + + gi = GitIgnore(Harmony_root) + + print("GitIgnore directories (relative to Harmony root):") + for rel in sorted(gi.prefixes): + print(rel) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(test_GitIgnore()) diff --git a/tool/skeleton/check b/tool/skeleton/check new file mode 120000 index 0000000..45a8ec1 --- /dev/null +++ b/tool/skeleton/check @@ -0,0 +1 @@ +CLI.py \ No newline at end of file diff --git a/tool/skeleton/doc.py b/tool/skeleton/doc.py new file mode 100644 index 0000000..4fee15a --- /dev/null +++ b/tool/skeleton/doc.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +doc.py - usage and help text for the Harmony 'check' tool + +Grammar (informal): + + * [] + + :: | | + + :: version | help | usage + :: environment + :: structure | import | export | suspicious | addendum | all +""" + +from __future__ import annotations + +import os +import sys +from typing import TextIO + + +def prog_name() -> str: + """ + Return the program name as invoked by the user. + + Typically: + - basename(sys.argv[0]) when running from the shell. + - Falls back to 'check' if argv[0] is empty. + """ + raw = sys.argv[0] if sys.argv and sys.argv[0] else "check" + base = os.path.basename(raw) or raw + return base + + +def _usage_text(prog: str) -> str: + return f"""\ +Usage: + {prog} * [] + +Where: + :: | | + + :: version | help | usage + :: environment + :: structure | import | export | suspicious | addendum | all +""" + + +def _help_text(prog: str) -> str: + return f"""\ +{prog} — Harmony skeleton integrity and metadata checker + +For now: + This is a placeholder help message. + + The tool accepts one or more tokens and an optional + argument. Each is classified as one of: + + - (version, help, usage) + - (environment) + - (structure, import, export, suspicious, addendum, all) + +Detailed behavior for each command will be documented here as the +implementation is completed. +""" + + +def print_usage( + stream: TextIO | None = None +) -> None: + """ + Print the usage text to the given stream (default: sys.stdout), + using the actual program name as invoked. + """ + if stream is None: + stream = sys.stdout + + text = _usage_text(prog_name()) + stream.write(text) + if not text.endswith("\n"): + stream.write("\n") + + +def print_help( + stream: TextIO | None = None +) -> None: + """ + Print the help text to the given stream (default: sys.stdout), + using the actual program name as invoked. + """ + if stream is None: + stream = sys.stdout + + utext = _usage_text(prog_name()) + htext = _help_text(prog_name()) + + stream.write(utext) + if not utext.endswith("\n"): + stream.write("\n") + + stream.write("\n") + stream.write(htext) + if not htext.endswith("\n"): + stream.write("\n") diff --git a/tool/skeleton/load_command_module.py b/tool/skeleton/load_command_module.py new file mode 100644 index 0000000..226b6dd --- /dev/null +++ b/tool/skeleton/load_command_module.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +load_command_module.py - locate and import Python command modules from $PATH + +Behavior: + 1. Search $PATH for an executable with the given command name. + 2. Prefer a path containing '/incommon/'. + 3. If only /usr/bin/ is found, raise an error saying we were + looking for the incommon version. + 4. Import the chosen script as a Python module, even if it has no .py + extension, by forcing a SourceFileLoader. +""" + +from __future__ import annotations + +import importlib.util +import os +from importlib.machinery import SourceFileLoader +from types import ModuleType +from typing import List + + +def _find_command_candidates(command_name: str) -> List[str]: + """ + Return a list of absolute paths to executables named `command_name` + found on $PATH. + """ + paths: list[str] = [] + + path_env = os.environ.get("PATH", "") + for dir_path in path_env.split(os.pathsep): + if not dir_path: + continue + candidate = os.path.join(dir_path, command_name) + if os.path.isfile(candidate) and os.access(candidate, os.X_OK): + paths.append(os.path.realpath(candidate)) + + return paths + + +def load_command_module(command_name: str) -> ModuleType: + """ + Locate an executable named `command_name` on $PATH and load it + as a Python module. + + Selection policy: + 1. Prefer any path containing '/incommon/'. + 2. If only /usr/bin/ candidates exist, raise an error + saying we were looking for the incommon version. + 3. If no candidate is found, raise an error. + + Implementation detail: + Because the incommon command may lack a .py suffix, we explicitly + construct a SourceFileLoader rather than relying on the default + extension-based loader resolution. + """ + candidates = _find_command_candidates(command_name) + + incommon_candidates = [ + p + for p in candidates + if "/incommon/" in p + ] + + usrbin_candidates = [ + p + for p in candidates + if p.startswith("/usr/bin/") + ] + + if incommon_candidates: + target = incommon_candidates[0] + elif usrbin_candidates: + raise RuntimeError( + f"Found /usr/bin/{command_name}, but expected the incommon Python " + f"{command_name} module on PATH." + ) + else: + raise RuntimeError( + f"Could not find an incommon '{command_name}' module on PATH." + ) + + module_name = f"rt_incommon_{command_name}" + + loader = SourceFileLoader( + module_name + ,target + ) + spec = importlib.util.spec_from_loader( + module_name + ,loader + ) + if spec is None: + raise RuntimeError(f"Failed to create spec for {command_name} from {target}") + + module = importlib.util.module_from_spec(spec) + # spec.loader is the SourceFileLoader we just created + assert spec.loader is not None + spec.loader.exec_module(module) + + return module diff --git a/tool/skeleton/meta.py b/tool/skeleton/meta.py new file mode 100644 index 0000000..2025597 --- /dev/null +++ b/tool/skeleton/meta.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +meta.py - thin wrappers around command modules + +Current responsibilities: + 1. Load the incommon 'printenv' command module (no .py extension) + using load_command_module.load_command_module(). + 2. Expose printenv() here, calling the imported printenv() work + function with default arguments (equivalent to running without + any CLI arguments). + 3. Provide a simple version printer for this meta module. + 4. Provide a small debug tag API (set/clear/has). +""" + +from __future__ import annotations + +from load_command_module import load_command_module + + +# Load the incommon printenv module once at import time +_PRINTENV_MODULE = load_command_module("printenv") + + +# Meta module version +_major = 1 +_minor = 1 +def version_print() -> None: + """ + Print the meta module version as MAJOR.MINOR. + """ + print(f"{_major}.{_minor}") + + +# Debug tag set and helpers +_debug = set([ +]) + + +def debug_set(tag: str) -> None: + """ + Add a debug tag to the meta debug set. + """ + _debug.add(tag) + + +def debug_clear(tag: str) -> None: + """ + Remove a debug tag from the meta debug set, if present. + """ + _debug.discard(tag) + + +def debug_has(tag: str) -> bool: + """ + Return True if the given debug tag is present. + """ + return tag in _debug + + +# Touch the default tag once so static checkers do not complain about +# unused helpers when imported purely for side-effects. +debug_has("Command") + + +def printenv() -> int: + """ + Call the imported printenv() work function with default arguments: + - no null termination + - no newline quoting + - no specific names (print full environment) + - prog name 'printenv' + """ + return _PRINTENV_MODULE.printenv( + False # null_terminate + ,False # quote_newlines + ,[] # names + ,"printenv" + ) diff --git a/tool/skeleton/skeleton.py b/tool/skeleton/skeleton.py new file mode 100755 index 0000000..2c3b12f --- /dev/null +++ b/tool/skeleton/skeleton.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +skeleton.py - helpers for working with the Harmony skeleton tree +""" + +from __future__ import annotations + +import os +import sys +import meta + + +# where_is_Harmony +# +# Context / assumptions: +# 1. This module lives somewhere under the Harmony tree, for example: +# /.../Harmony/tool/skeleton/skeleton.py +# 2. CLI.py is run from somewhere inside the same tree (or a clone). +# +# Search behavior: +# 1. Start from the directory containing this file. +# 2. Walk upward towards the filesystem root, with limits: +# a) Do not move up more than 5 levels. +# b) Stop immediately if the current directory contains a +# '.git' subdirectory. +# +# Result classification: +# status is one of: +# 'found' -> we found a directory whose basename is 'Harmony' +# 'different' -> we stopped at a directory that has a '.git' +# subdirectory, but its basename is not 'Harmony' +# 'not-found' -> we hit the 5-level limit or filesystem root +# without finding 'Harmony' or a '.git' directory +# +# Path: +# - In all cases, the returned path is the last directory inspected: +# * the 'Harmony' directory (status 'found'), or +# * the directory with '.git' (status 'different'), or +# * the directory at the 5-level limit / filesystem root +# (status 'not-found'). +# +# Debug printing: +# - If meta.debug_has("print_Harmony_root") is true, print: +# * "The Harmony project root found at: {path}" +# when status == 'found' +# * "Harmony not found, but found: {path}" +# when status == 'different' +# * "Harmony not found." +# when status == 'not-found' +def where_is_Harmony() -> tuple[str, str]: + """ + Locate the Harmony root (or best guess). + + Returns: + (status, path) + """ + here = os.path.abspath(__file__) + d = os.path.dirname(here) + + harmony_root = None + status = "not-found" + + max_up = 5 + steps = 0 + + while True: + base = os.path.basename(d) + + # Case 1: exact 'Harmony' directory name + if base == "Harmony": + harmony_root = d + status = "found" + break + + # Case 2: stop at a directory that has a .git subdirectory + git_dir = os.path.join(d, ".git") + if os.path.isdir(git_dir): + harmony_root = d + if base == "Harmony": + status = "found" + else: + status = "different" + break + + parent = os.path.dirname(d) + + # Stop if we hit filesystem root + if parent == d: + harmony_root = d + status = "not-found" + break + + steps += 1 + if steps > max_up: + # Reached search depth limit; last inspected directory is d + harmony_root = d + status = "not-found" + break + + d = parent + + if harmony_root is None: + # Extremely defensive; in practice harmony_root will be set above. + harmony_root = d + + root_base = os.path.basename(harmony_root) + + # Warning to stderr if we are not literally in a 'Harmony' directory + if root_base != "Harmony": + sys.stderr.write( + f"WARNING: Harmony root basename is '{root_base}', expected 'Harmony'.\n" + ) + + if meta.debug_has("print_Harmony_root"): + if status == "found": + print(f"The Harmony project root found at: {harmony_root}") + elif status == "different": + print(f"Harmony not found, but found: {harmony_root}") + else: + print("Harmony not found.") + + return status, harmony_root + +def test(): + meta.debug_set("print_Harmony_root") + status,Harmony_root = where_is_Harmony() + +if __name__ == "__main__": + raise SystemExit(test()) + + diff --git a/tool/skeleton_diff_core.py b/tool/skeleton_diff_core.py deleted file mode 100644 index 3bd6401..0000000 --- a/tool/skeleton_diff_core.py +++ /dev/null @@ -1,696 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- -# TODO: -# - Properly parse and apply .gitignore patterns instead of the current -# heuristic: -# * At project root: .gitignore is treated as a no-op (discern always -# returns "Accept"). -# * Below root: .gitignore causes the entire directory subtree to be -# ignored (except for the .gitignore file itself). -# - Integrate real .gitignore parsing into the GitIgnore discern functions -# and remove the "ignore whole subtree" simplification. -""" -skeleton_diff_core — Harmony Skeleton Auditor, core logic - -Version: Major.Minor = 0.5 -Author: Thomas Walker Lynch, with Grok and Vaelorin -Date: 2025-11-18 - -This module holds the core data structures and algorithms for comparing -a Harmony project (the skeleton) against another project () that was -cloned or derived from it. - -CLI and documentation live in separate modules: - - skeleton_diff_docs.py (usage/help text) - - skeleton_check (CLI front end) -""" - -from __future__ import annotations - -import hashlib -import os -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Callable, Dict, List, Optional, Set, Tuple - -# ---------------------------------------------------------------------- -# Version -# ---------------------------------------------------------------------- -MAJOR = 0 -MINOR = 5 -VERSION = f"{MAJOR}.{MINOR}" - -# ---------------------------------------------------------------------- -# Harmony root -# ---------------------------------------------------------------------- -HARMONY_ROOT = Path(os.getenv("REPO_HOME", str(Path.cwd()))).resolve() -if not HARMONY_ROOT.exists(): - print("ERROR: $REPO_HOME not set or invalid. Source env_toolsmith.", file=sys.stderr) - sys.exit(1) - -# ---------------------------------------------------------------------- -# Types -# ---------------------------------------------------------------------- -DiscernResult = str # "Accept" or "Ignore" -DiscernFn = Callable[[Path, "NodeInfo"], DiscernResult] - - -@dataclass -class NodeInfo: - """Filesystem node information for comparison.""" - - is_file: bool - is_dir: bool - is_other: bool - is_leaf: bool - mtime: Optional[float] - checksum: Optional[str] - - -@dataclass -class ComparisonResults: - """Results of comparing Harmony skeleton to .""" - - missing_list: List[Path] - present_but_ignored_list: List[Path] - newer_list: List[Path] - older_list: List[Path] - different_list: List[Path] - addendum_list: List[Path] - - -# ---------------------------------------------------------------------- -# GitIgnore support -# ---------------------------------------------------------------------- -class GitIgnore: - """ - Simplified .gitignore handler based on a stack of discern functions. - - Each entry in the stack is: - (discern_fn, scope_dir_rel) - - Where: - - discern_fn(rel_path, node_info) -> "Accept" | "Ignore" - - scope_dir_rel is the directory *containing* the .gitignore file - that produced this discern_fn. - - The current implementation does not parse .gitignore patterns. Instead, - parse_gitignore() returns one of two heuristics (see TODO at top of file). - """ - - def __init__(self, project_root: Path) -> None: - self.project_root = project_root - self._stack: List[Tuple[DiscernFn, Path]] = [] - - def push(self, scope_dir_rel: Path, discern_fn: DiscernFn) -> None: - self._stack.append((discern_fn, scope_dir_rel)) - - def pop(self) -> None: - if self._stack: - self._stack.pop() - - def check(self, rel_path: Path, node_info: NodeInfo) -> DiscernResult: - """ - Apply discern functions from top of stack down. If any returns "Ignore", - we return "Ignore". If none do, we return "Accept". - """ - # Most specific rules are near the top. - for discern_fn, _scope_dir_rel in reversed(self._stack): - decision = discern_fn(rel_path, node_info) - if decision == "Ignore": - return "Ignore" - return "Accept" - - -def parse_gitignore( - project_root: Path, - gitignore_rel_path: Path, - node_info: NodeInfo, -) -> DiscernFn: - """ - Stub .gitignore parser. - - For now: - - If the .gitignore is at the project root (scope directory == "."), - return a discern function that always returns "Accept". - - Otherwise, return a discern function that ignores the entire subtree - under the directory that contains the .gitignore file, except for - the .gitignore file itself. - - This is intentionally simple and marked as a TODO for future improvement. - """ - scope_dir_rel = gitignore_rel_path.parent - - if scope_dir_rel == Path("."): - def discern_root(rel_path: Path, node_info_: NodeInfo) -> DiscernResult: - # Heuristic: root-level .gitignore does nothing until we implement - # real parsing. - return "Accept" - return discern_root - - def discern_subtree(rel_path: Path, node_info_: NodeInfo) -> DiscernResult: - # Always accept the .gitignore file itself. - if rel_path == gitignore_rel_path: - return "Accept" - # Ignore everything under the scope directory. - if len(scope_dir_rel.parts) <= len(rel_path.parts): - if rel_path.parts[: len(scope_dir_rel.parts)] == scope_dir_rel.parts: - return "Ignore" - return "Accept" - - return discern_subtree - - -# ---------------------------------------------------------------------- -# Built-in ignore patterns (independent of .gitignore) -# ---------------------------------------------------------------------- -def is_builtin_ignored(rel_path: Path) -> bool: - """ - Quick filter for paths we always ignore, regardless of .gitignore. - - Patterns: - - Any path under a ".git" directory - - __pycache__ directories - - .ipynb_checkpoints - - .pytest_cache - - Python bytecode files: *.pyc, *.pyo, *.pyd, *.py[cod] - - Editor backups: *~, *.bak - """ - parts = rel_path.parts - if not parts: - return False - - if ".git" in parts: - return True - - basename = parts[-1] - - if basename in { - "__pycache__", - ".ipynb_checkpoints", - ".pytest_cache", - }: - return True - - if ( - basename.endswith(".pyc") - or basename.endswith(".pyo") - or basename.endswith(".pyd") - ): - return True - - if basename.endswith("~") or basename.endswith(".bak"): - return True - - return False - - -# ---------------------------------------------------------------------- -# NodeInfo helpers -# ---------------------------------------------------------------------- -def make_node_info( - abs_path: Path, - compute_checksum_flag: bool, -) -> NodeInfo: - is_dir_flag = abs_path.is_dir() - is_file_flag = abs_path.is_file() - is_other_flag = not (is_dir_flag or is_file_flag) - - try: - stat_obj = abs_path.stat() - mtime_value = stat_obj.st_mtime - except OSError: - mtime_value = None - - checksum_value: Optional[str] = None - if compute_checksum_flag and is_file_flag: - checksum_value = compute_checksum(abs_path) - - # Leaf determination is done in a second pass after indexing. - return NodeInfo( - is_file=is_file_flag, - is_dir=is_dir_flag, - is_other=is_other_flag, - is_leaf=False, - mtime=mtime_value, - checksum=checksum_value, - ) - - -def compute_checksum(abs_path: Path) -> str: - """Compute a SHA256 checksum for a file.""" - sha = hashlib.sha256() - try: - with abs_path.open("rb") as f_obj: - while True: - block = f_obj.read(65536) - if not block: - break - sha.update(block) - except OSError: - # On error, return a sentinel so we can still compare deterministically. - return "ERROR" - return sha.hexdigest() - - -# ---------------------------------------------------------------------- -# Project indexing -# ---------------------------------------------------------------------- -def index_project( - project_root: Path, - compute_checksum_flag: bool, -) -> Tuple[Dict[Path, NodeInfo], Set[Path]]: - """ - Build an index for a project tree. - - Returns: - (info_dict, ignored_set) - - Where: - - info_dict maps relative paths -> NodeInfo for all *accepted* nodes. - - ignored_set is the set of relative paths that were skipped due to - built-in ignore patterns or GitIgnore rules. - """ - info_dict: Dict[Path, NodeInfo] = {} - ignored_set: Set[Path] = set() - gitignore_obj = GitIgnore(project_root) - - def recurse(dir_rel_path: Path) -> None: - abs_dir_path = project_root / dir_rel_path - - # Handle .gitignore in this directory (if any) - gitignore_pushed_flag = False - gitignore_abs_path = abs_dir_path / ".gitignore" - if gitignore_abs_path.exists() and gitignore_abs_path.is_file(): - gitignore_rel_path = ( - dir_rel_path / ".gitignore" - if dir_rel_path != Path(".") - else Path(".gitignore") - ) - - node_info = make_node_info( - gitignore_abs_path, - compute_checksum_flag=False, - ) - - # Existing rules decide whether .gitignore itself is ignored. - decision = gitignore_obj.check(gitignore_rel_path, node_info) - if decision == "Ignore": - ignored_set.add(gitignore_rel_path) - else: - # Accept the .gitignore file and push a new discern function. - discern_fn = parse_gitignore( - project_root, - gitignore_rel_path, - node_info, - ) - gitignore_obj.push(dir_rel_path, discern_fn) - gitignore_pushed_flag = True - - # Walk directory contents - try: - entry_iter = sorted(abs_dir_path.iterdir(), key=lambda p: p.name) - except OSError: - # If we cannot list this directory, treat it as unreadable. - if gitignore_pushed_flag: - gitignore_obj.pop() - return - - for abs_entry_path in entry_iter: - entry_name = abs_entry_path.name - if entry_name == ".gitignore": - # Already handled above. - continue - - if dir_rel_path == Path("."): - rel_path = Path(entry_name) - else: - rel_path = dir_rel_path / entry_name - - # Built-in ignore filter first. - if is_builtin_ignored(rel_path): - ignored_set.add(rel_path) - if abs_entry_path.is_dir(): - # Do not recurse into ignored directories. - continue - continue - - node_info = make_node_info( - abs_entry_path, - compute_checksum_flag, - ) - - decision = gitignore_obj.check(rel_path, node_info) - if decision == "Ignore": - ignored_set.add(rel_path) - if abs_entry_path.is_dir(): - # Do not recurse into ignored directories. - continue - continue - - # Accepted node: record its info. - info_dict[rel_path] = node_info - - if abs_entry_path.is_dir(): - recurse(rel_path) - - # Pop the .gitignore rule for this directory scope, if any. - if gitignore_pushed_flag: - gitignore_obj.pop() - - # Start at project root (".") - recurse(Path(".")) - - # Second pass: determine leaf nodes. - # Initialize all as leaf, then mark parents as non-leaf. - for node_info in info_dict.values(): - node_info.is_leaf = True - - for rel_path in info_dict.keys(): - parent_rel_path = rel_path.parent - if parent_rel_path in info_dict: - info_dict[parent_rel_path].is_leaf = False - - return info_dict, ignored_set - - -# ---------------------------------------------------------------------- -# Comparison -# ---------------------------------------------------------------------- -def has_children( - info_dict: Dict[Path, NodeInfo], - parent_rel_path: Path, -) -> bool: - """Return True if any node in info_dict is a strict descendant of parent.""" - parent_parts = parent_rel_path.parts - parent_len = len(parent_parts) - if parent_len == 0: - # Parent is root; any non-root path counts as a child. - for rel_path in info_dict.keys(): - if rel_path != Path("."): - return True - return False - - for rel_path in info_dict.keys(): - if rel_path == parent_rel_path: - continue - if len(rel_path.parts) <= parent_len: - continue - if rel_path.parts[:parent_len] == parent_parts: - return True - return False - - -def compare_harmony_to_other( - harmony_root: Path, - other_root: Path, - compute_checksum_flag: bool, -) -> ComparisonResults: - """ - Compare Harmony (skeleton) to and produce the main lists: - - - missing_list - - present_but_ignored_list - - newer_list - - older_list - - different_list - - addendum_list - """ - harmony_info_dict, _harmony_ignored_set = index_project( - harmony_root, - compute_checksum_flag, - ) - - other_info_dict, other_ignored_set = index_project( - other_root, - compute_checksum_flag, - ) - - missing_list: List[Path] = [] - present_but_ignored_list: List[Path] = [] - newer_list: List[Path] = [] - older_list: List[Path] = [] - different_list: List[Path] = [] - addendum_set: Set[Path] = set() - - other_keys_set = set(other_info_dict.keys()) - - # First pass: walk Harmony skeleton dictionary. - for rel_path, harmony_info in harmony_info_dict.items(): - # 2.2.1: if the relative path is in the ignored set. - if rel_path in other_ignored_set: - present_but_ignored_list.append(rel_path) - continue - - other_info = other_info_dict.get(rel_path) - if other_info is None: - # 2.2.2.2.1.1: missing in . - missing_list.append(rel_path) - continue - - # 2.2.2.2.1.2: skeleton leaf vs non-leaf in . - if ( - harmony_info.is_dir - and harmony_info.is_leaf - and other_info.is_dir - and has_children(other_info_dict, rel_path) - ): - # Add all descendants of this directory in to addendum. - parent_parts = rel_path.parts - parent_len = len(parent_parts) - for candidate_rel in other_keys_set: - if candidate_rel == rel_path: - continue - if len(candidate_rel.parts) <= parent_len: - continue - if candidate_rel.parts[:parent_len] == parent_parts: - addendum_set.add(candidate_rel) - - # 2.2.2.2.1.3: modification time comparison (and optional checksum). - if harmony_info.mtime is not None and other_info.mtime is not None: - if other_info.mtime > harmony_info.mtime: - newer_list.append(rel_path) - elif other_info.mtime < harmony_info.mtime: - older_list.append(rel_path) - else: - if ( - compute_checksum_flag - and harmony_info.checksum is not None - and other_info.checksum is not None - and harmony_info.checksum != other_info.checksum - ): - different_list.append(rel_path) - - # Second pass: addendum nodes that do not correspond to any skeleton entry. - for other_rel_path in other_keys_set: - if other_rel_path not in harmony_info_dict: - addendum_set.add(other_rel_path) - - addendum_list = sorted(addendum_set) - - missing_list.sort() - present_but_ignored_list.sort() - newer_list.sort() - older_list.sort() - different_list.sort() - - return ComparisonResults( - missing_list=missing_list, - present_but_ignored_list=present_but_ignored_list, - newer_list=newer_list, - older_list=older_list, - different_list=different_list, - addendum_list=addendum_list, - ) - - -# ---------------------------------------------------------------------- -# Cached comparison for command handlers -# ---------------------------------------------------------------------- -_cached_other_root: Optional[Path] = None -_cached_checksum_flag: bool = False -_cached_results: Optional[ComparisonResults] = None - - -def ensure_comparison( - other_root: Path, - compute_checksum_flag: bool = False, -) -> ComparisonResults: - global _cached_other_root - global _cached_checksum_flag - global _cached_results - - other_root_resolved = other_root.resolve() - - if ( - _cached_results is None - or _cached_other_root != other_root_resolved - or _cached_checksum_flag != compute_checksum_flag - ): - _cached_results = compare_harmony_to_other( - HARMONY_ROOT, - other_root_resolved, - compute_checksum_flag, - ) - _cached_other_root = other_root_resolved - _cached_checksum_flag = compute_checksum_flag - - return _cached_results - - -# ---------------------------------------------------------------------- -# Work functions (called by CLI) -# ---------------------------------------------------------------------- -def work_environment() -> int: - print("=== Environment ===") - print(f"REPO_HOME = {HARMONY_ROOT}") - for key, value in sorted(os.environ.items()): - if key.startswith(("HARMONY_", "REPO_", "PATH")) or "tool" in key.lower(): - print(f"{key} = {value}") - return 0 - - -def work_structure(other_root: Path) -> int: - results = ensure_comparison(other_root, compute_checksum_flag=False) - - print("=== Structure / Presence ===") - - if results.missing_list: - print("Missing Harmony paths in :") - for rel_path in results.missing_list: - print(f" [MISSING] {rel_path}") - print() - else: - print("No missing skeleton paths found in .") - print() - - if results.present_but_ignored_list: - print("Paths present in but ignored by its .gitignore / filters:") - for rel_path in results.present_but_ignored_list: - print(f" [IGNORED] {rel_path}") - print() - else: - print("No skeleton paths are masked by 's ignore rules.") - - return 0 - - -def work_age(other_root: Path, checksum_flag: bool = False) -> int: - results = ensure_comparison(other_root, compute_checksum_flag=checksum_flag) - - print("=== File Age Comparison ===") - - if results.newer_list: - print("Paths newer in (import candidates):") - for rel_path in results.newer_list: - print(f" [NEWER] {rel_path}") - print() - else: - print("No paths are newer in than in Harmony.") - print() - - if results.older_list: - print("Paths older in (export candidates):") - for rel_path in results.older_list: - print(f" [OLDER] {rel_path}") - print() - else: - print("No paths are older in than in Harmony.") - print() - - if checksum_flag and results.different_list: - print("Paths with equal mtime but different checksum (suspicious):") - for rel_path in results.different_list: - print(f" [DIFFERENT] {rel_path}") - print() - elif checksum_flag: - print("No checksum-only differences detected.") - print() - - return 0 - - -def work_import(other_root: Path) -> int: - results = ensure_comparison(other_root, compute_checksum_flag=False) - - print("=== Import Commands (newer → Harmony) ===") - - if not results.newer_list: - print(" No newer files in to import.") - return 0 - - for rel_path in results.newer_list: - src = other_root / rel_path - dst = HARMONY_ROOT / rel_path - print(f"cp {src} {dst} # clobbers older Harmony file") - - return 0 - - -def work_export(other_root: Path) -> int: - results = ensure_comparison(other_root, compute_checksum_flag=False) - - print("=== Export Commands (Harmony → ) ===") - - if not results.older_list: - print(" No stale files in to export.") - return 0 - - for rel_path in results.older_list: - src = HARMONY_ROOT / rel_path - dst = other_root / rel_path - print(f"cp {src} {dst} # clobbers stale file in ") - - return 0 - - -def work_addendum(other_root: Path) -> int: - results = ensure_comparison(other_root, compute_checksum_flag=False) - - print("=== Addendum: project-local paths in ===") - - if not results.addendum_list: - print(" None found.") - return 0 - - for rel_path in results.addendum_list: - print(f" [ADDENDUM] {rel_path}") - - return 0 - - -def work_suspicious(other_root: Path, checksum_flag: bool = False) -> int: - """ - Suspicious = checksum-only differences (when enabled) plus - present_but_ignored, grouped as "things that deserve a human look". - """ - results = ensure_comparison(other_root, compute_checksum_flag=checksum_flag) - - print("=== Suspicious Paths ===") - - any_flag = False - - if results.present_but_ignored_list: - any_flag = True - print("Skeleton paths masked by 's ignore rules:") - for rel_path in results.present_but_ignored_list: - print(f" [IGNORED] {rel_path}") - print() - - if checksum_flag and results.different_list: - any_flag = True - print("Paths with equal mtime but different checksum:") - for rel_path in results.different_list: - print(f" [DIFFERENT] {rel_path}") - print() - - if not any_flag: - print(" None found.") - - return 0 - - -def work_version() -> int: - print(f"skeleton_diff version {VERSION}") - return 0 diff --git a/tool/skeleton_diff_docs.py b/tool/skeleton_diff_docs.py deleted file mode 100644 index f79241b..0000000 --- a/tool/skeleton_diff_docs.py +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- -""" -skeleton_diff_docs — usage and help text for skeleton_diff / skeleton_check -""" - -from __future__ import annotations - -from pathlib import Path -import sys - -from skeleton_diff_core import VERSION - - -def work_usage() -> int: - program_name = Path(sys.argv[0]).name or "skeleton_check" - - print(f"Usage: {program_name} []... []") - print() - print(" is required if any of the specified commands") - print("require a project to analyze.") - print() - print("Commands:") - print(" version Show program version (Major.Minor)") - print(" help Long-form documentation") - print(" usage This short summary") - print(" environment Show key environment variables (including $REPO_HOME)") - print(" structure Compare skeleton presence vs (missing / ignored)") - print(" age Compare file ages (newer / older)") - print(" import Print shell commands for pulling newer skeleton") - print(" paths from into Harmony") - print(" export Print shell commands for pushing current skeleton") - print(" paths from Harmony into ") - print(' suspicious Show paths masked by ignore rules and checksum-only') - print(" differences (when checksum mode is enabled)") - print(" addendum List project-local paths in that do not exist") - print(" in the Harmony skeleton or that live under skeleton") - print(" leaf directories") - print(" all Run the full set of analyses for a project") - print() - print("Examples:") - print(f" {program_name} usage") - print(f" {program_name} structure ../subu") - print(f" {program_name} all ../subu") - print() - print(f"Run '{program_name} help' for detailed explanations.") - return 0 - - -def work_help() -> int: - help_text = f""" -skeleton_diff — Harmony Skeleton Auditor -======================================== - -Version: {VERSION} - -1. Purpose -1.1 The skeleton_diff tool compares a Harmony project (the skeleton) with - another project () that was originally cloned from Harmony. -1.2 Over time, individual projects tend to evolve: - - Some improvements are made in projects but never pulled back to the - Harmony skeleton. - - Some improvements make it back into Harmony, leaving older projects - with stale copies of skeleton files. - - Extra directories and files appear in projects, some intentional and - some accidental. -1.3 skeleton_diff helps you see that drift clearly so that you can: - - Pull newer tooling back into the skeleton. - - Push newer skeleton files out into projects. - - Spot suspicious clutter, ignored paths, and structural misuse. - -2. Invocation and Argument Rules -2.1 Basic command line form: - skeleton_check []... [] -2.2 is required if any of the specified commands - require a project to analyze. -2.3 Commands are parsed from left to right as a list. The final argument - is interpreted as only if: - 2.3.1 At least one command that requires a project appears earlier in - the argument list, and - 2.3.2 There is at least one argument left after that command. -2.4 Dominating commands: - 2.4.1 If any of the following appear anywhere on the command line: - usage, help, version - then that command is executed and all other arguments are - ignored (including other commands and paths). -2.5 Commands that require : - 2.5.1 structure - 2.5.2 age - 2.5.3 import - 2.5.4 export - 2.5.5 suspicious - 2.5.6 addendum - 2.5.7 all (which expands to a sequence of project commands) -2.6 Commands that do not require a project: - 2.6.1 version - 2.6.2 help - 2.6.3 usage - 2.6.4 environment -2.7 Missing project argument: - 2.7.1 If the first command that requires a project is also the last - argument, there is no argument left to serve as - , and skeleton_check reports an error. - 2.7.2 If a command that requires a project appears before the last - argument, the last argument is interpreted as , even if - its spelling matches a command name. -2.8 Effect of “all”: - 2.8.1 The special command “all” is shorthand for: - environment, structure, age, import, export, suspicious, addendum - 2.8.2 “all” may not be combined with other commands. If present, it - must be the only non-dominating command on the line. - -3. Environment Expectations -3.1 Before running skeleton_check you are expected to: - 3.1.1 Be inside a Harmony-derived project. - 3.1.2 Have already run: - source env_toolsmith - which in turn sources: - tool_shared/bespoke/env - 3.1.3 Have $REPO_HOME set to your Harmony project root. -3.2 All skeleton paths are derived from: - $REPO_HOME -3.3 The tool does not modify any files. It only reports differences and - prints suggested copy commands for you to run (or edit) manually. - -4. Core Concepts -4.1 Harmony skeleton dictionary - 4.1.1 The Harmony tree (under $REPO_HOME) is traversed once to build - a dictionary mapping relative paths to node information - (NodeInfo: type, leaf flag, mtime, and optional checksum). - 4.1.2 This dictionary is the authoritative description of the skeleton. -4.2 dictionary - 4.2.1 The tree is traversed similarly, with its own GitIgnore - instance and built-in ignore filters. - 4.2.2 The dictionary is authoritative for what actually - contains, including paths that are between and below the skeleton. -4.3 Ignore handling - 4.3.1 A GitIgnore class holds a stack of discern functions that each - accept or ignore nodes based on their relative path and NodeInfo. - 4.3.2 The current implementation does not parse .gitignore patterns. - For non-root .gitignore files, the entire subtree under that - directory is ignored (except for the .gitignore itself). This is - a simplification with a TODO to replace it with proper parsing. -4.4 Leaf nodes - 4.4.1 Leaf nodes in the Harmony skeleton are paths that have no - accepted descendants under $REPO_HOME. - 4.4.2 When has extra content under a skeleton leaf directory, - that content is treated as addendum (project-local extensions). - -5. Commands (high-level) -5.1 version, help, usage, environment - 5.1.1 version - - Prints: - skeleton_diff version . - 5.1.2 help - - Shows this detailed documentation. - 5.1.3 usage - - Shows a short, to-the-point command summary and examples. - 5.1.4 environment - - Prints $REPO_HOME and related Harmony / REPO variables, plus - PATH and selected tool-related variables. -5.2 structure (requires ) - 5.2.1 Uses the skeleton and dictionaries to find: - - Paths that exist in Harmony but are missing in - ([MISSING] entries). - - Paths that exist in both Harmony and , but where the - path is ignored by its .gitignore / filters - ([IGNORED] entries). -5.3 age (requires ) - 5.3.1 Compares mtimes between Harmony and for paths that - exist in both: - - NEWER: mtime > Harmony mtime (import candidates). - - OLDER: mtime < Harmony mtime (export candidates). - 5.3.2 With checksum mode enabled, paths with equal mtime but different - content (different checksum) are reported as DIFFERENT. -5.4 import / export (require ) - 5.4.1 import - - Prints cp commands to copy newer paths from back into - Harmony, overwriting older skeleton files (git history keeps - old versions). - 5.4.2 export - - Prints cp commands to copy newer skeleton paths from Harmony - into , overwriting stale project files. -5.5 suspicious (requires ) - 5.5.1 Reports skeleton paths that: - - Are present in but hidden by ignore rules, and/or - - Have equal mtime but different content when checksum mode is - enabled. - 5.5.2 These are the paths that most need human inspection. -5.6 addendum (requires ) - 5.6.1 Reports project-local paths in : - - Any path under a skeleton leaf directory that does not exist - in the skeleton, and - - Any path that appears in but not in the skeleton - dictionary at all. - 5.6.2 These are candidates to remain project-specific or to be pulled - back into the skeleton. - -6. Example Workflows -6.1 Inspect a specific project’s drift - 6.1.1 From a Harmony project: - source env_toolsmith - skeleton_check all ../subu -6.2 Import improvements from a project - 6.2.1 Run: - skeleton_check import ../subu -6.3 Refresh a stale project from the skeleton - 6.3.1 Run: - skeleton_check export ../some_project -6.4 Quick documentation and environment checks - 6.4.1 Without a project: - skeleton_check usage - skeleton_check help - skeleton_check version - skeleton_check environment - -7. Safety and Limitations -7.1 No automatic writes - 7.1.1 skeleton_check never changes files itself. It only prints - commands and reports. -7.2 Time-based comparison - 7.2.1 “Newer” and “older” are based on filesystem modification times. - If clocks or timestamps are misleading, results may need manual - interpretation. -7.3 Ignore semantics - 7.3.1 The current .gitignore handling is intentionally simplified: - non-root .gitignore files cause their entire directory subtree - to be ignored. This will be replaced by real pattern parsing in - a future version. -""" - print(help_text.strip()) - return 0 -- 2.20.1