From: Thomas Walker Lynch Date: Wed, 19 Nov 2025 16:12:54 +0000 (+0000) Subject: introduces the tool X-Git-Url: https://git.reasoningtechnology.com/style/static/git-logo.png?a=commitdiff_plain;h=e6a68813b7548f0e9820f814d002e16bf73c15b8;p=Harmony.git introduces the tool --- diff --git a/tool/skeleton/A_minus_B b/tool/skeleton/A_minus_B deleted file mode 100755 index f6f7bbb..0000000 --- a/tool/skeleton/A_minus_B +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -A_minus_B - CLI test driver for skeleton.tree_dict_A_minus_B(A, B) - -Usage: - A_minus_B -""" - -from __future__ import annotations - -import os -import sys -from typing import Sequence - -import meta -import skeleton - - -def CLI(argv: Sequence[str] | None = None) -> int: - if argv is None: - argv = sys.argv[1:] - - prog = os.path.basename(sys.argv[0]) if sys.argv else "A_minus_B" - - if len(argv) != 2 or argv[0] in ("-h", "--help"): - print(f"Usage: {prog} ") - return 1 - - A_root = argv[0] - B_root = argv[1] - - if not os.path.isdir(A_root): - print(f"{prog}: {A_root}: not a directory") - return 2 - - if not os.path.isdir(B_root): - print(f"{prog}: {B_root}: not a directory") - return 3 - - A = skeleton.tree_dict_make(A_root, None) - B = skeleton.tree_dict_make(B_root, None) - - meta.debug_set("tree_dict_A_minus_B") - - _result = skeleton.tree_dict_A_minus_B(A, B) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(CLI()) diff --git a/tool/skeleton/CLI.py b/tool/skeleton/CLI.py deleted file mode 100755 index 46e810b..0000000 --- a/tool/skeleton/CLI.py +++ /dev/null @@ -1,282 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -CLI.py - Harmony skeleton checker - -Grammar (informal): - - check * [] - - :: | | - - :: version | help | usage - :: environment - :: structure | import | export | suspicious | addendum | all - -Commands are sorted into three sets: - 1. HELP_COMMANDS - 2. NO_OTHER_COMMANDS - 3. HAS_OTHER_COMMANDS - -At runtime, argv commands are classified into four lists: - 1. help_list - 2. no_other_list - 3. has_other_list - 4. unclassified_list - -If the meta debug set contains the tag "print_command_lists", these four lists -are printed. - -If 'environment' appears in no_other_list, the meta.printenv() helper -is invoked to print the environment. - -For commands we compare: - - A = Harmony skeleton tree_dict - B = project tree_dict (path is the last argv token when any - is present before it). -""" - -from __future__ import annotations - -import os -import sys -from typing import Sequence - -import command -import doc -import Harmony -import meta -import skeleton - -meta.debug_set("print_command_lists") - -# Command tag sets (classification universe) -HELP_COMMANDS: set[str] = set([ - "version" - ,"help" - ,"usage" -]) - -NO_OTHER_COMMANDS: set[str] = set([ - "environment" -]) - -HAS_OTHER_COMMANDS: set[str] = set([ - "structure" - ,"import" - ,"export" - ,"suspicious" - ,"addendum" - ,"all" -]) - - -def command_type(arg: str) -> str: - """ - Classify a single command token. - - Returns: - "Help" if arg is a help command - "NoOther" if arg is a no_other command - "HasOther" if arg is a has_other command - "UnClassified" otherwise - """ - if arg in HELP_COMMANDS: - return "Help" - - if arg in NO_OTHER_COMMANDS: - return "NoOther" - - if arg in HAS_OTHER_COMMANDS: - return "HasOther" - - return "UnClassified" - - -def print_command_lists( - help_list: list[str] - ,no_other_list: list[str] - ,has_other_list: list[str] - ,unclassified_list: list[str] -) -> None: - """ - Print the four classified command lists derived from argv. - """ - print("help_list:", help_list) - print("no_other_list:", no_other_list) - print("has_other_list:", has_other_list) - print("unclassified_list:", unclassified_list) - - -def CLI(argv: Sequence[str] | None = None) -> int: - """ - CLI entrypoint. - - Responsibilities: - 1. Accept argv (or sys.argv[1:] by default). - 2. Classify arguments using command_type(), with the last argument - treated specially to avoid aliasing. - 3. Invoke behaviors implied by the commands. - 4. Return integer status code. - - Argument interpretation: - - Let argv = [a0, a1, ..., aN-1]. - - - If N == 0: - no commands; nothing to do. - - - If N >= 1: - * Classify a0..aN-2. - - If any are UnClassified -> error. - - * If any appear in a0..aN-2: - - aN-1 is treated as path (B_root), not classified. - - * If no appear in a0..aN-2: - - Classify aN-1: - - If UnClassified -> error (unknown command). - - If HasOther -> error (other path not specified). - - Else -> added to Help / NoOther lists. - """ - if argv is None: - argv = sys.argv[1:] - - # No arguments: print usage and exit with status 1. - if len(argv) == 0: - doc.print_usage() - return 1 - - # No arguments: nothing to do (could later decide to print usage). - if len(argv) == 0: - return 0 - - # Split into head (all but last) and last argument - head = argv[:-1] - last = argv[-1] - - help_list: list[str] = [] - no_other_list: list[str] = [] - has_other_list: list[str] = [] - unclassified_list: list[str] = [] - - # 1. Classify head tokens - for arg in head: - ct = command_type(arg) - - if ct == "Help": - help_list.append(arg) - elif ct == "NoOther": - no_other_list.append(arg) - elif ct == "HasOther": - has_other_list.append(arg) - else: - unclassified_list.append(arg) - - # Any unclassified in the head is an error - if len(unclassified_list) > 0: - first_bad = unclassified_list[0] - print(f"Unrecognized command: {first_bad}") - return 5 - - head_has_other = (len(has_other_list) > 0) - - B_root: str | None = None - - if head_has_other: - # 2A. Any in head -> last arg is always path. - B_root = os.path.abspath(last) - else: - # 2B. No in head -> classify last. - ct = command_type(last) - - if ct == "UnClassified": - print(f"Unrecognized command: {last}") - return 5 - - if ct == "HasOther": - print("Other path not specified for has_other command(s).") - return 6 - - if ct == "Help": - help_list.append(last) - elif ct == "NoOther": - no_other_list.append(last) - # ct cannot be HasOther here due to earlier check. - - if meta.debug_has("print_command_lists"): - print_command_lists( - help_list - ,no_other_list - ,has_other_list - ,unclassified_list - ) - - # Help handling - if len(help_list) > 0: - if "version" in help_list: - meta.version_print() - if "usage" in help_list: - doc.print_usage() - if "help" in help_list: - doc.print_help() - return 1 - - ret_val = 0 - - # No-other commands (environment, etc.) - if "environment" in no_other_list: - env_status = meta.printenv() - if env_status != 0: - ret_val = env_status - - # If we still have no has_other commands, we are done. - # (Example: just "environment", or just "help/usage".) - if len(has_other_list) == 0: - return ret_val - - # At this point we know: - # - has_other_list is non-empty - # - B_root must have been set (head_has_other was True) - if B_root is None: - print("Internal error: B_root not set despite has_other commands.") - return 7 - - if not os.path.isdir(B_root): - print(f"Other project path is not a directory: {B_root}") - return 4 - - # Determine Harmony root (A_root) - status, A_root = Harmony.where() - - if status == "not-found": - print("Harmony project not found; normally this command is run from within Harmony.") - return 3 - - if status == "different": - print("Seems we are not running in the Harmony project, will exit.") - return 2 - - # Build tree_dicts for A (Harmony) and B (other project) - A_tree = skeleton.tree_dict_make(A_root, None) - B_tree = skeleton.tree_dict_make(B_root, None) - - # Dispatch the commands - cmd_status = command.dispatch( - has_other_list - ,A_tree - ,B_tree - ,A_root - ,B_root - ) - - if cmd_status != 0: - ret_val = cmd_status - - return ret_val - - -if __name__ == "__main__": - raise SystemExit(CLI()) diff --git a/tool/skeleton/GitIgnore.py b/tool/skeleton/GitIgnore.py deleted file mode 100755 index 70c6509..0000000 --- a/tool/skeleton/GitIgnore.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -GitIgnore.py - minimal .gitignore-based helper for Harmony projects - -Behavior: - - 1. During initialization, traverse the project tree rooted at - . - - 2. Whenever a directory contains a '.gitignore' file, record: - - its relative directory path from the project root - - a list of regular expressions compiled from the patterns - in that '.gitignore' file - - These are stored in: - - self.rules: Dict[str, List[Pattern]] - - where the key is the directory RELATIVE to the project root: - "" -> project root (top-level .gitignore) - "src" -> src/.gitignore - "src/module" -> src/module/.gitignore - - 3. check() -> token: - - - is a path relative to the project root. - - - We compute all prefix directories of , including the - root (""), for example: - - path = "a/b/c.py" - prefixes = ["", "a", "a/b"] - - - For each prefix, if there are regexes stored for that directory, - we collect them. - - - We then test ALL collected regexes against the basename of - (the last component only). - - - If ANY regex matches, return 'Ignore'. - Otherwise return 'Accept'. - -Notes: - - * We implement a simplified subset of .gitignore semantics suitable - for your current patterns and add a small base ignore set for - always-ignored names such as '.git'. -""" - -from __future__ import annotations - -import fnmatch -import os -import re -from typing import Dict, List -import Harmony - - -class GitIgnore: - """ - GitIgnore(project_path) - - Attributes: - project_path: - Absolute path to the project root. - - rules: - Mapping from relative directory path -> list of compiled regex - patterns derived from that directory's '.gitignore' file. - - Example: - rules[""] -> patterns from /.gitignore - rules["developer"] -> patterns from developer/.gitignore - - base_patterns: - List of compiled regex patterns applied to the basename of every - checked path, independent of any .gitignore file. Currently used - to always ignore '.git' directories. - """ - - def __init__( - self - ,project_path: str - ) -> None: - """ - Initialize a GitIgnore instance with a path to a project and - scan for '.gitignore' files. - """ - self.project_path: str = os.path.abspath(project_path) - self.rules: Dict[str, List[re.Pattern]] = {} - - # Base patterns: always applied, regardless of .gitignore contents. - # These are matched against basenames only. - self.base_patterns: List[re.Pattern] = [ - re.compile(r"^\.git$") # ignore any basename == ".git" - ] - - self._scan_project() - - def _scan_project(self) -> None: - """ - Traverse the project tree and populate self.rules with entries of - the form: - - -> [Pattern, Pattern, ...] - - where is the directory containing '.gitignore', relative - to the project root ("" for root). - """ - root = self.project_path - - for dirpath, dirnames, filenames in os.walk(root, topdown=True): - if ".gitignore" not in filenames: - continue - - rel_dir = os.path.relpath(dirpath, root) - if rel_dir == ".": - rel_dir = "" - - gitignore_path = os.path.join(dirpath, ".gitignore") - patterns = self._parse_gitignore_file(gitignore_path) - - if patterns: - if rel_dir not in self.rules: - self.rules[rel_dir] = [] - self.rules[rel_dir].extend(patterns) - - def _parse_gitignore_file( - self - ,gitignore_path: str - ) -> List[re.Pattern]: - """ - Parse a single '.gitignore' file into a list of compiled regex patterns. - - Simplified rules: - - Blank lines and lines starting with '#' are ignored. - - Lines containing '/' in the MIDDLE are currently ignored - (future extension). - - Lines ending with '/' are treated as directory name patterns: - '__pycache__/' -> pattern on basename '__pycache__' - - All patterns are treated as name globs and compiled via - fnmatch.translate(), to be matched against basenames only. - """ - patterns: List[re.Pattern] = [] - - try: - with open(gitignore_path, "r", encoding="utf-8") as f: - for raw_line in f: - line = raw_line.strip() - - # Skip comments and blank lines - if not line or line.startswith("#"): - continue - - # Remove trailing '/' for directory patterns (e.g. '__pycache__/') - if line.endswith("/"): - line = line[:-1].strip() - if not line: - continue - - # If there is still a '/' in the line, we do not support this - # pattern in this minimal implementation. - if "/" in line: - continue - - # Compile as a name glob -> regex - regex_text = fnmatch.translate(line) - patterns.append(re.compile(regex_text)) - - except OSError: - # If the .gitignore cannot be read, just skip it. - return patterns - - return patterns - - def check( - self - ,path: str - ) -> str: - """ - Check a path against the collected .gitignore patterns. - - path: - A path relative to the project root. - - Returns: - 'Ignore' if any applicable pattern matches the basename of the path, - otherwise 'Accept'. - """ - # Normalize the incoming path - norm = os.path.normpath(path) - - # If the path is '.' or empty, we accept it - if norm in ("", "."): - return "Accept" - - basename = os.path.basename(norm) - - # First, apply base patterns (always applied). - for pat in self.base_patterns: - if pat.match(basename): - return "Ignore" - - # Build the list of directories that may contribute .gitignore rules. - # - # For path "a/b/c": - # prefixes: ["", "a", "a/b"] - parts = norm.split(os.sep) - - prefixes: List[str] = [""] - prefix = None - for part in parts[:-1]: - if prefix is None: - prefix = part - else: - prefix = os.path.join(prefix, part) - prefixes.append(prefix) - - # Collect all patterns from the applicable .gitignore directories - for rel_dir in prefixes: - dir_patterns = self.rules.get(rel_dir) - if not dir_patterns: - continue - - for pat in dir_patterns: - if pat.match(basename): - return "Ignore" - - return "Accept" - - -def test_GitIgnore() -> int: - """ - 1. Locate the Harmony project root using Harmony.where(). - 2. Create a GitIgnore instance rooted at that path. - 3. Print: - - directories that have .gitignore rules - - directories (relative) that would be ignored by check() - """ - status, Harmony_root = Harmony.where() - - if status == "not-found": - print("Harmony project not found; cannot test GitIgnore.") - return 1 - - if status == "different": - print("Warning: Harmony not found, using nearest .git directory for GitIgnore test.") - - gi = GitIgnore(Harmony_root) - - print(".gitignore rule directories (relative to Harmony root):") - for rel_dir in sorted(gi.rules.keys()): - print(f" {rel_dir if rel_dir else '.'}") - - print("\nDirectories that would be ignored (relative to Harmony root):") - for dirpath, dirnames, filenames in os.walk(Harmony_root, topdown=True): - rel_dir = os.path.relpath(dirpath, Harmony_root) - if rel_dir == ".": - rel_dir = "" - - if gi.check(rel_dir) == "Ignore": - print(f" {rel_dir if rel_dir else '.'}") - - return 0 - - -if __name__ == "__main__": - raise SystemExit(test_GitIgnore()) diff --git a/tool/skeleton/Harmony.py b/tool/skeleton/Harmony.py deleted file mode 100644 index 9385507..0000000 --- a/tool/skeleton/Harmony.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -locate the project root -""" - -from __future__ import annotations - -import meta -import os -import sys -from typing import Any, Callable, Dict - -# where -# -# Context / assumptions: -# 1. This module lives somewhere under the Harmony tree, for example: -# /.../Harmony/tool/skeleton/skeleton.py -# 2. CLI.py is run from somewhere inside the same tree (or a clone). -# -# Search behavior: -# 1. Start from the directory containing this file. -# 2. Walk upward towards the filesystem root, with limits: -# a) Do not move up more than 5 levels. -# b) Stop immediately if the current directory contains a -# '.git' subdirectory. -# -# Result classification: -# status is one of: -# 'found' -> we found a directory whose basename is 'Harmony' -# 'different' -> we stopped at a directory that has a '.git' -# subdirectory, but its basename is not 'Harmony' -# 'not-found' -> we hit the 5-level limit or filesystem root -# without finding 'Harmony' or a '.git' directory -# -# Path: -# - In all cases, the returned path is the last directory inspected: -# * the 'Harmony' directory (status 'found'), or -# * the directory with '.git' (status 'different'), or -# * the directory at the 5-level limit / filesystem root -# (status 'not-found'). -# -# Debug printing: -# - If meta.debug_has("print_Harmony_root") is true, print: -# * "The Harmony project root found at: {path}" -# when status == 'found' -# * "Harmony not found, but found: {path}" -# when status == 'different' -# * "Harmony not found." -# when status == 'not-found' -def where() -> tuple[str, str]: - """ - Locate the Harmony root (or best guess). - - Returns: - (status, path) - """ - here = os.path.abspath(__file__) - d = os.path.dirname(here) - - harmony_root = None - status = "not-found" - - max_up = 5 - steps = 0 - - while True: - base = os.path.basename(d) - - # Case 1: exact 'Harmony' directory name - if base == "Harmony": - harmony_root = d - status = "found" - break - - # Case 2: stop at a directory that has a '.git' subdirectory - git_dir = os.path.join(d, ".git") - if os.path.isdir(git_dir): - harmony_root = d - if base == "Harmony": - status = "found" - else: - status = "different" - break - - parent = os.path.dirname(d) - - # Stop if we hit filesystem root - if parent == d: - harmony_root = d - status = "not-found" - break - - steps += 1 - if steps > max_up: - # Reached search depth limit; last inspected directory is d - harmony_root = d - status = "not-found" - break - - d = parent - - if harmony_root is None: - # Extremely defensive; in practice harmony_root will be set above. - harmony_root = d - - root_base = os.path.basename(harmony_root) - - # Warning to stderr if we are not literally in a 'Harmony' directory - if root_base != "Harmony": - sys.stderr.write( - f"WARNING: Harmony root basename is '{root_base}', expected 'Harmony'.\n" - ) - - if meta.debug_has("print_Harmony_root"): - if status == "found": - print(f"The Harmony project root found at: {harmony_root}") - elif status == "different": - print(f"Harmony not found, but found: {harmony_root}") - else: - print("Harmony not found.") - - return status, harmony_root - -def test_where() -> int: - """ - Simple test that prints the Harmony root using the debug flag. - """ - meta.debug_set("print_Harmony_root") - status, _root = where() - return 0 if status != "not-found" else 1 - diff --git a/tool/skeleton/Harmony_where b/tool/skeleton/Harmony_where deleted file mode 100755 index 9d39f1e..0000000 --- a/tool/skeleton/Harmony_where +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -Harmony_where - CLI to locate the Harmony project root - -Usage: - Harmony_where - -Prints the status and path returned by Harmony.where(). -""" - -from __future__ import annotations - -import sys - -import Harmony - - -def CLI(argv=None) -> int: - # Ignore argv; no arguments expected - status, Harmony_root = Harmony.where() - - if status == "found": - print(f"Harmony project root found at: {Harmony_root}") - return 0 - - if status == "different": - print(f"Harmony not found, but nearest .git directory is: {Harmony_root}") - return 1 - - print("Harmony project root not found.") - return 2 - - -if __name__ == "__main__": - raise SystemExit(CLI()) diff --git a/tool/skeleton/check b/tool/skeleton/check deleted file mode 120000 index 45a8ec1..0000000 --- a/tool/skeleton/check +++ /dev/null @@ -1 +0,0 @@ -CLI.py \ No newline at end of file diff --git a/tool/skeleton/command.py b/tool/skeleton/command.py deleted file mode 100644 index 155340a..0000000 --- a/tool/skeleton/command.py +++ /dev/null @@ -1,508 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -command.py - high-level dispatch for Harmony check commands - -Commands (semantics): - - structure: - - Differences in directory structure: directories present in A but - not present as directories in B. - - import: - - Shell copy commands to copy: - * in-between nodes in B that are newer than A (same relative path), or - * in-between nodes in B that do not exist in A at all. - Direction: B -> A - Also emits: - * a mkdir list (directories to create in A) - * an "other" list for type mismatches / non-file/dir nodes. - - export: - - Shell copy commands to copy: - * nodes in A that are newer than B, and - * nodes in A that do not exist in B. - Direction: A -> B - Also emits: - * a mkdir list (directories to create in B) - * an "other" list for type mismatches / non-file/dir nodes. - - suspicious: - - Nodes in B that fall "in between" the Harmony skeleton topology: - under some A directory, but not under any A leaf directory. - (tree_dict_in_between_and_below(A,B).in_between) - - addendum: - - Nodes in B that fall "below" Harmony leaf directories: - added work in appropriate extension locations. - (tree_dict_in_between_and_below(A,B).below) - - all: - - Runs structure, import, export, suspicious, and addendum. -""" - -from __future__ import annotations - -import os -from typing import Any, Dict, List, Tuple - -import skeleton - -TreeDict = Dict[str, Dict[str, Any]] - - -def shell_quote( - s: str -) -> str: - """ - Minimal single-quote shell quoting. - """ - return "'" + s.replace("'", "'\"'\"'") + "'" - - -def _print_header( - title: str -) -> None: - print() - print(f"== {title} ==") - - -# ---------------------------------------------------------------------- -# structure: directories in A that are missing / non-directories in B -# ---------------------------------------------------------------------- -def cmd_structure( - A: TreeDict - ,B: TreeDict -) -> int: - """ - structure: differences in directory structure, directories in A - B. - - We include any path where: - - A[path].node_type == 'directory', and - - either path not in B, or B[path].node_type != 'directory'. - """ - structural: TreeDict = {} - - for path, info_A in A.items(): - if info_A.get("node_type") != "directory": - continue - - info_B = B.get(path) - if info_B is None or info_B.get("node_type") != "directory": - structural[path] = info_A - - if not structural: - _print_header("structure") - print("No structural directory differences (A - B).") - return 0 - - _print_header("structure: directories in A not in B") - skeleton.tree_dict_print(structural) - return 0 - - -# ---------------------------------------------------------------------- -# import: B -> A (mkdir, cp, and "other" list), using in_between_newer -# ---------------------------------------------------------------------- -def build_import_commands( - A_tree: TreeDict - ,B_tree: TreeDict - ,A_root: str - ,B_root: str -) -> Tuple[List[str], List[str], List[str]]: - """ - Compute shell commands to update A from B. - - Returns: - (mkdir_cmds, cp_cmds, other_list) - - Semantics: - - mkdir_cmds: - - Directories that are directories in B, but are missing in A. - - We DO NOT auto-resolve type mismatches (e.g. B=directory, - A=file); those go into other_list. - - cp_cmds: - - Files where: - * the path does not exist in A, OR - * the node in A is not a file, OR - * the B copy is newer than A (mtime comparison). - - However, if A has a non-file at that path, we treat it as a - type mismatch and add that path to other_list instead of - emitting a cp command. - - other_list: - - Human-readable notes for: - * type mismatches between A and B, and - * nodes in B that are neither 'file' nor 'directory'. - """ - mkdir_cmds: List[str] = [] - cp_cmds: List[str] = [] - other_list: List[str] = [] - - for rel_path, b_info in B_tree.items(): - b_type = b_info.get("node_type") - rel_display = rel_path if rel_path else "." - - a_info = A_tree.get(rel_path) - a_type = a_info.get("node_type") if a_info is not None else "MISSING" - - # Case 1: B node is neither file nor directory -> other_list - if b_type not in ("file", "directory"): - other_list.append( - f"{rel_display}: A={a_type}, B={b_type}" - ) - continue - - # Case 2: B directory - if b_type == "directory": - if a_info is None: - # Missing in A: safe to mkdir -p - target_dir = os.path.join(A_root, rel_path) if rel_path else A_root - mkdir_cmds.append(f"mkdir -p {shell_quote(target_dir)}") - else: - # Exists in A: must also be a directory to be "structurally OK" - if a_type != "directory": - # Type mismatch: do not mkdir, just report - other_list.append( - f"{rel_display}: A={a_type}, B=directory" - ) - continue - - # Case 3: B file - # Decide whether to copy B -> A, or report conflict. - if a_info is None: - # B-only file - src = os.path.join(B_root, rel_path) if rel_path else B_root - dst = A_root - cp_cmds.append( - f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" - ) - continue - - # A has something at this path - if a_type != "file": - # Type mismatch (e.g. A=directory, B=file, or A=other) - other_list.append( - f"{rel_display}: A={a_type}, B=file" - ) - continue - - # Both files: compare mtime - a_mtime = a_info.get("mtime") - b_mtime = b_info.get("mtime") - - if isinstance(a_mtime, (int, float)) and isinstance(b_mtime, (int, float)): - if b_mtime > a_mtime: - src = os.path.join(B_root, rel_path) if rel_path else B_root - dst = A_root - cp_cmds.append( - f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" - ) - - return mkdir_cmds, cp_cmds, other_list - - -def cmd_import( - A_tree: TreeDict - ,B_tree: TreeDict - ,A_root: str - ,B_root: str -) -> int: - """ - import: update the skeleton (A) from the project (B), - using only in_between_newer nodes. - """ - inb_newer = skeleton.in_between_newer(A_tree, B_tree) - - mkdir_cmds, cp_cmds, other_list = build_import_commands( - A_tree - ,inb_newer - ,A_root - ,B_root - ) - - print("== import: copy from B -> A (in-between newer only) ==") - print(f"# A root: {A_root}") - print(f"# B root: {B_root}") - print("# Only considering in-between files that are new or absent in A.") - print("#") - - print("# Directories to create in A (mkdir -p):") - if mkdir_cmds: - for line in mkdir_cmds: - print(line) - else: - print("# (none)") - print("#") - - print("# Files to copy from B -> A (cp --parents -a):") - if cp_cmds: - for line in cp_cmds: - print(line) - else: - print("# (none)") - print("#") - - print("# Nodes NOT handled automatically (type mismatches / non-file/dir):") - if other_list: - for rel in other_list: - print(f"# {rel}") - else: - print("# (none)") - - return 0 - - -# ---------------------------------------------------------------------- -# export: A -> B (mkdir, cp, and "other" list) -# ---------------------------------------------------------------------- -def build_export_commands( - A_tree: TreeDict - ,B_tree: TreeDict - ,A_root: str - ,B_root: str -) -> Tuple[List[str], List[str], List[str]]: - """ - Compute shell commands to update B from A. - - Returns: - (mkdir_cmds, cp_cmds, other_list) - - Semantics: - - mkdir_cmds: - - Directories that are directories in A, but are missing in B. - - Type mismatches go into other_list. - - cp_cmds: - - Files where: - * the path does not exist in B, OR - * the node in B is not a file, OR - * the A copy is newer than B (mtime comparison). - - If B has a non-file while A has a file, treat as type mismatch. - - other_list: - - Human-readable notes for: - * type mismatches between A and B, and - * nodes in A that are neither 'file' nor 'directory'. - """ - mkdir_cmds: List[str] = [] - cp_cmds: List[str] = [] - other_list: List[str] = [] - - for rel_path, a_info in A_tree.items(): - a_type = a_info.get("node_type") - rel_display = rel_path if rel_path else "." - - b_info = B_tree.get(rel_path) - b_type = b_info.get("node_type") if b_info is not None else "MISSING" - - # Case 1: A node is neither file nor directory -> other_list - if a_type not in ("file", "directory"): - other_list.append( - f"{rel_display}: A={a_type}, B={b_type}" - ) - continue - - # Case 2: A directory - if a_type == "directory": - if b_info is None: - # Missing in B: safe to mkdir -p - target_dir = os.path.join(B_root, rel_path) if rel_path else B_root - mkdir_cmds.append(f"mkdir -p {shell_quote(target_dir)}") - else: - # Exists in B: must also be directory - if b_type != "directory": - other_list.append( - f"{rel_display}: A=directory, B={b_type}" - ) - continue - - # Case 3: A file - if b_info is None: - # A-only file - src = os.path.join(A_root, rel_path) if rel_path else A_root - dst = B_root - cp_cmds.append( - f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" - ) - continue - - if b_type != "file": - other_list.append( - f"{rel_display}: A=file, B={b_type}" - ) - continue - - # Both files: compare mtime - a_mtime = a_info.get("mtime") - b_mtime = b_info.get("mtime") - - if isinstance(a_mtime, (int, float)) and isinstance(b_mtime, (int, float)): - if a_mtime > b_mtime: - src = os.path.join(A_root, rel_path) if rel_path else A_root - dst = B_root - cp_cmds.append( - f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" - ) - - return mkdir_cmds, cp_cmds, other_list - - -def cmd_export( - A_tree: TreeDict - ,B_tree: TreeDict - ,A_root: str - ,B_root: str -) -> int: - """ - export: show directory creation and copy commands A -> B. - """ - mkdir_cmds, cp_cmds, other_list = build_export_commands( - A_tree - ,B_tree - ,A_root - ,B_root - ) - - print("== export: copy from A -> B ==") - print(f"# A root: {A_root}") - print(f"# B root: {B_root}") - print("#") - - print("# Directories to create in B (mkdir -p):") - if mkdir_cmds: - for line in mkdir_cmds: - print(line) - else: - print("# (none)") - print("#") - - print("# Files to copy from A -> B (cp --parents -a):") - if cp_cmds: - for line in cp_cmds: - print(line) - else: - print("# (none)") - print("#") - - print("# Nodes NOT handled automatically (type mismatches / non-file/dir):") - if other_list: - for rel in other_list: - print(f"# {rel}") - else: - print("# (none)") - - return 0 - - -# ---------------------------------------------------------------------- -# suspicious / addendum via in_between_and_below -# ---------------------------------------------------------------------- -def cmd_suspicious( - A: TreeDict - ,B: TreeDict -) -> int: - """ - suspicious: nodes in B that fall 'in between' the Harmony skeleton, - not under leaf directories. - """ - in_between, _below = skeleton.tree_dict_in_between_and_below(A, B) - - _print_header("suspicious: nodes in-between Harmony leaves") - - if not in_between: - print("No suspicious nodes found in B (relative to A).") - return 0 - - skeleton.tree_dict_print(in_between) - return 0 - - -def cmd_addendum( - A: TreeDict - ,B: TreeDict -) -> int: - """ - addendum: nodes in B that fall 'below' Harmony leaf directories. - """ - _in_between, below = skeleton.tree_dict_in_between_and_below(A, B) - - _print_header("addendum: nodes added under Harmony leaves") - - if not below: - print("No addendum nodes found in B (relative to A).") - return 0 - - skeleton.tree_dict_print(below) - return 0 - - -# ---------------------------------------------------------------------- -# Top-level dispatcher -# ---------------------------------------------------------------------- -def dispatch( - has_other_list: List[str] - ,A: TreeDict - ,B: TreeDict - ,A_root: str - ,B_root: str -) -> int: - """ - Dispatch commands. - - has_other_list: - List of command tokens (subset of: - 'structure', 'import', 'export', 'suspicious', 'addendum', 'all'). - - A, B: - tree_dicts for Harmony skeleton (A) and project (B). - - A_root, B_root: - Root paths corresponding to A and B (for copy commands). - """ - cmds = set(has_other_list) - - if "all" in cmds: - cmds.update([ - "structure" - ,"import" - ,"export" - ,"suspicious" - ,"addendum" - ]) - - ordered = [ - "structure" - ,"import" - ,"export" - ,"suspicious" - ,"addendum" - ] - - status = 0 - - for name in ordered: - if name not in cmds: - continue - - if name == "structure": - rc = cmd_structure(A, B) - elif name == "import": - rc = cmd_import(A, B, A_root, B_root) - elif name == "export": - rc = cmd_export(A, B, A_root, B_root) - elif name == "suspicious": - rc = cmd_suspicious(A, B) - elif name == "addendum": - rc = cmd_addendum(A, B) - else: - rc = 0 - - if rc != 0: - status = rc - - return status diff --git a/tool/skeleton/doc.py b/tool/skeleton/doc.py deleted file mode 100644 index a8d5351..0000000 --- a/tool/skeleton/doc.py +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -doc.py - usage and help text for the Harmony 'check' tool - -Grammar (informal): - - * [] - - :: | | - - :: version | help | usage - :: environment - :: structure | import | export | suspicious | addendum | all -""" - -from __future__ import annotations - -import meta -import os -import sys -from typing import TextIO - - -def prog_name() -> str: - """ - Return the program name as invoked by the user. - - Typically: - - basename(sys.argv[0]) when running from the shell. - - Falls back to 'check' if argv[0] is empty. - """ - raw = sys.argv[0] if sys.argv and sys.argv[0] else "check" - base = os.path.basename(raw) or raw - return base - - -def _usage_text(prog: str) -> str: - return f"""\ -Usage: - {prog} * [] - -Where: - :: | | - - :: version | help | usage - :: environment - :: structure | import | export | suspicious | addendum | all -""" - - -def _help_text(prog: str) -> str: - return f"""\ -{prog} - Harmony skeleton integrity and metadata checker - -Syntax: - {prog} * [] - -Where: - :: path - :: | | - - :: version | help | usage - :: environment - :: structure | import | export | suspicious | addendum | all - -Argument rules (informal): - 1. commands are processed first, and then the program returns. - Hence if any help commands are present, the remaining commands - are ignored. - - 2. We assume {prog} is run within the Harmony skeleton, or a skeleton - derived from it. This is the 'default skeleton', or more simply, 'A'. - - 3. The path is the directory of a project that is assumed to - be built upon the default skeleton. This second project root is - referred to as 'B'. - - 4. If none of the commands require an path argument, then it - should not be given. Otherwise it is required. A command that - requires an path argument is called a command. - - 5. Implementation detail: all arguments except the last are first - treated as commands. If any of those are , the last - argument is interpreted as the path. If no - appears before the last argument, the last argument is treated as - another command. - -Roots: - A = Skeleton project root (auto-detected). Currently this is the - Harmony skeleton, but {prog} is not limited to Harmony. - - B = project root (path argument when required). - -{prog} is used to ask questions about how has changed relative -to the current default skeleton. Changes may come from edits to the -skeleton itself, edits to skeleton files in , or files and -directories added to . Stated briefly, {prog} compares A with B. -Conceptually, A and B are any two non-overlapping directory trees. - -Command semantics: - structure - - Report directory-structure differences: - directories present in A that are missing in B or not directories - in B. - - Output: a table of such directories. - - import - - Suggest shell copy commands to update A from B: - * files in B that are newer than A at the same relative path - * files that exist in B but not in A - - Direction: B -> A - - Output: 'cp --parents -a' commands (to be reviewed/edited before use). - - export - - Suggest shell copy commands to update B from A: - * files where the A copy is newer than B at the same path - * files that exist in A but not in B - - Direction: A -> B - - Output: 'cp --parents -a' commands (to be reviewed/edited before use). - - suspicious - - Report nodes in B that lie "in between" the Harmony skeleton: - under a directory present in A, but not under any leaf directory - in A. - - Intended to highlight questionable placements that may indicate - misuse of the skeleton or candidates for new skeleton structure. - - addendum - - Report nodes in B that lie "below" Harmony leaf directories: - work added in the intended extension points (tools, tests, etc.). - - Intended to show project-specific additions made in proper places. - - all - - Run: structure, import, export, suspicious, addendum (in that order). - -Notes: - - Directory and file listings respect a simplified .gitignore model - plus some always-ignored patterns (such as '.git' directories). - - Timestamps are formatted via the Z helper in UTC (ISO 8601). -""" - -def print_usage( - stream: TextIO | None = None -) -> None: - """ - Print the usage text to the given stream (default: sys.stdout), - using the actual program name as invoked. - """ - if stream is None: - stream = sys.stdout - - text = _usage_text(prog_name()) - stream.write(text) - if not text.endswith("\n"): - stream.write("\n") - - -def print_help( - stream: TextIO | None = None -) -> None: - """ - Print the help text to the given stream (default: sys.stdout), - using the actual program name as invoked. - """ - if stream is None: - stream = sys.stdout - - utext = _usage_text(prog_name()) - htext = _help_text(prog_name()) - - stream.write(utext) - if not utext.endswith("\n"): - stream.write("\n") - - stream.write("\n") - stream.write(htext) - if not htext.endswith("\n"): - stream.write("\n") diff --git a/tool/skeleton/in_between_and_below b/tool/skeleton/in_between_and_below deleted file mode 100755 index 2993767..0000000 --- a/tool/skeleton/in_between_and_below +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -in_between_and_below - CLI test driver for skeleton.tree_dict_in_between_and_below(A, B) - -Usage: - in_between_and_below -""" - -from __future__ import annotations - -import os -import sys -from typing import Sequence - -import meta -import skeleton - - -def CLI(argv: Sequence[str] | None = None) -> int: - if argv is None: - argv = sys.argv[1:] - - prog = os.path.basename(sys.argv[0]) if sys.argv else "in_between_and_below" - - if len(argv) != 2 or argv[0] in ("-h", "--help"): - print(f"Usage: {prog} ") - return 1 - - A_root = argv[0] - B_root = argv[1] - - if not os.path.isdir(A_root): - print(f"{prog}: {A_root}: not a directory") - return 2 - - if not os.path.isdir(B_root): - print(f"{prog}: {B_root}: not a directory") - return 3 - - A = skeleton.tree_dict_make(A_root, None) - B = skeleton.tree_dict_make(B_root, None) - - meta.debug_set("tree_dict_in_between_and_below") - - _result = skeleton.tree_dict_in_between_and_below(A, B) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(CLI()) diff --git a/tool/skeleton/load_command_module.py b/tool/skeleton/load_command_module.py deleted file mode 100644 index 226b6dd..0000000 --- a/tool/skeleton/load_command_module.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -load_command_module.py - locate and import Python command modules from $PATH - -Behavior: - 1. Search $PATH for an executable with the given command name. - 2. Prefer a path containing '/incommon/'. - 3. If only /usr/bin/ is found, raise an error saying we were - looking for the incommon version. - 4. Import the chosen script as a Python module, even if it has no .py - extension, by forcing a SourceFileLoader. -""" - -from __future__ import annotations - -import importlib.util -import os -from importlib.machinery import SourceFileLoader -from types import ModuleType -from typing import List - - -def _find_command_candidates(command_name: str) -> List[str]: - """ - Return a list of absolute paths to executables named `command_name` - found on $PATH. - """ - paths: list[str] = [] - - path_env = os.environ.get("PATH", "") - for dir_path in path_env.split(os.pathsep): - if not dir_path: - continue - candidate = os.path.join(dir_path, command_name) - if os.path.isfile(candidate) and os.access(candidate, os.X_OK): - paths.append(os.path.realpath(candidate)) - - return paths - - -def load_command_module(command_name: str) -> ModuleType: - """ - Locate an executable named `command_name` on $PATH and load it - as a Python module. - - Selection policy: - 1. Prefer any path containing '/incommon/'. - 2. If only /usr/bin/ candidates exist, raise an error - saying we were looking for the incommon version. - 3. If no candidate is found, raise an error. - - Implementation detail: - Because the incommon command may lack a .py suffix, we explicitly - construct a SourceFileLoader rather than relying on the default - extension-based loader resolution. - """ - candidates = _find_command_candidates(command_name) - - incommon_candidates = [ - p - for p in candidates - if "/incommon/" in p - ] - - usrbin_candidates = [ - p - for p in candidates - if p.startswith("/usr/bin/") - ] - - if incommon_candidates: - target = incommon_candidates[0] - elif usrbin_candidates: - raise RuntimeError( - f"Found /usr/bin/{command_name}, but expected the incommon Python " - f"{command_name} module on PATH." - ) - else: - raise RuntimeError( - f"Could not find an incommon '{command_name}' module on PATH." - ) - - module_name = f"rt_incommon_{command_name}" - - loader = SourceFileLoader( - module_name - ,target - ) - spec = importlib.util.spec_from_loader( - module_name - ,loader - ) - if spec is None: - raise RuntimeError(f"Failed to create spec for {command_name} from {target}") - - module = importlib.util.module_from_spec(spec) - # spec.loader is the SourceFileLoader we just created - assert spec.loader is not None - spec.loader.exec_module(module) - - return module diff --git a/tool/skeleton/make_Harmony_tree_dict b/tool/skeleton/make_Harmony_tree_dict deleted file mode 100755 index 2ed3cea..0000000 --- a/tool/skeleton/make_Harmony_tree_dict +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -skeleton_test - build and print the Harmony tree_dict - -Usage: - skeleton_test - -Behavior: - 1. Locate the Harmony project root via Harmony.where(). - 2. Enable 'tree_dict_print' debug flag. - 3. Call skeleton.tree_dict_make(Harmony_root, None). - -The skeleton.tree_dict_make() function is expected to call -tree_dict_print() when the 'tree_dict_print' debug flag is set. -""" - -from __future__ import annotations - -import sys - -import Harmony -import meta -import skeleton - - -def CLI(argv=None) -> int: - # No arguments expected - status, Harmony_root = Harmony.where() - - if status == "not-found": - print("Harmony project not found; cannot build tree_dict.") - return 1 - - if status == "different": - print("Warning: Harmony not found, using nearest .git directory for tree_dict.") - - # Enable printing inside tree_dict_make - meta.debug_set("tree_dict_print") - - _tree = skeleton.tree_dict_make(Harmony_root, None) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(CLI()) diff --git a/tool/skeleton/meta.py b/tool/skeleton/meta.py deleted file mode 100644 index fc014f6..0000000 --- a/tool/skeleton/meta.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -meta.py - thin wrappers around command modules - -Current responsibilities: - 1. Load the incommon 'printenv' command module (no .py extension) - using load_command_module.load_command_module(). - 2. Expose printenv() here, calling the imported printenv() work - function with default arguments (equivalent to running without - any CLI arguments). - 3. Provide a simple version printer for this meta module. - 4. Provide a small debug tag API (set/clear/has). -""" - -from __future__ import annotations - -import datetime -from load_command_module import load_command_module - - -# Load the incommon printenv module once at import time -_PRINTENV_MODULE = load_command_module("printenv") -_Z_MODULE = load_command_module("Z") - - -# Meta module version -_major = 1 -_minor = 5 -def version_print() -> None: - """ - Print the meta module version as MAJOR.MINOR. - """ - print(f"{_major}.{_minor}") - - -# Debug tag set and helpers -_debug = set([ -]) - - -def debug_set(tag: str) -> None: - """ - Add a debug tag to the meta debug set. - """ - _debug.add(tag) - - -def debug_clear(tag: str) -> None: - """ - Remove a debug tag from the meta debug set, if present. - """ - _debug.discard(tag) - - -def debug_has(tag: str) -> bool: - """ - Return True if the given debug tag is present. - """ - return tag in _debug - - -# Touch the default tag once so static checkers do not complain about -# unused helpers when imported purely for side-effects. -debug_has("Command") - - -def printenv() -> int: - """ - Call the imported printenv() work function with default arguments: - - no null termination - - no newline quoting - - no specific names (print full environment) - - prog name 'printenv' - """ - return _PRINTENV_MODULE.printenv( - False # null_terminate - ,False # quote_newlines - ,[] # names - ,"printenv" - ) - - -def z_format_mtime( - mtime: float -) -> str: - """ - Format a POSIX mtime (seconds since epoch, UTC) using the Z module. - - Uses Z.ISO8601_FORMAT and Z.make_timestamp(dt=...). - """ - dt = datetime.datetime.fromtimestamp(mtime, datetime.timezone.utc) - return _Z_MODULE.make_timestamp( - fmt=_Z_MODULE.ISO8601_FORMAT - ,dt=dt - ) diff --git a/tool/skeleton/newer b/tool/skeleton/newer deleted file mode 100755 index 30aa373..0000000 --- a/tool/skeleton/newer +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -newer - CLI test driver for skeleton.tree_dict_newer(A, B) - -Usage: - newer -""" - -from __future__ import annotations - -import os -import sys -from typing import Sequence - -import meta -import skeleton - - -def CLI(argv: Sequence[str] | None = None) -> int: - if argv is None: - argv = sys.argv[1:] - - prog = os.path.basename(sys.argv[0]) if sys.argv else "newer" - - if len(argv) != 2 or argv[0] in ("-h", "--help"): - print(f"Usage: {prog} ") - return 1 - - A_root = argv[0] - B_root = argv[1] - - if not os.path.isdir(A_root): - print(f"{prog}: {A_root}: not a directory") - return 2 - - if not os.path.isdir(B_root): - print(f"{prog}: {B_root}: not a directory") - return 3 - - A = skeleton.tree_dict_make(A_root, None) - B = skeleton.tree_dict_make(B_root, None) - - meta.debug_set("tree_dict_newer") - - _result = skeleton.tree_dict_newer(A, B) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(CLI()) diff --git a/tool/skeleton/older b/tool/skeleton/older deleted file mode 100755 index f8ff24d..0000000 --- a/tool/skeleton/older +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -older - CLI test driver for skeleton.tree_dict_older(A, B) - -Usage: - older -""" - -from __future__ import annotations - -import os -import sys -from typing import Sequence - -import meta -import skeleton - - -def CLI(argv: Sequence[str] | None = None) -> int: - if argv is None: - argv = sys.argv[1:] - - prog = os.path.basename(sys.argv[0]) if sys.argv else "older" - - if len(argv) != 2 or argv[0] in ("-h", "--help"): - print(f"Usage: {prog} ") - return 1 - - A_root = argv[0] - B_root = argv[1] - - if not os.path.isdir(A_root): - print(f"{prog}: {A_root}: not a directory") - return 2 - - if not os.path.isdir(B_root): - print(f"{prog}: {B_root}: not a directory") - return 3 - - A = skeleton.tree_dict_make(A_root, None) - B = skeleton.tree_dict_make(B_root, None) - - meta.debug_set("tree_dict_older") - - _result = skeleton.tree_dict_older(A, B) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(CLI()) diff --git a/tool/skeleton/skeleton.py b/tool/skeleton/skeleton.py deleted file mode 100644 index b8ffa70..0000000 --- a/tool/skeleton/skeleton.py +++ /dev/null @@ -1,546 +0,0 @@ -#!/usr/bin/env python3 -# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- - -""" -skeleton.py - helpers for working with the Harmony skeleton tree -""" - -from __future__ import annotations - -import os -import sys -from typing import Any, Callable, Dict, List, Set - -import meta -from GitIgnore import GitIgnore -import Harmony - -TreeDict = Dict[str, Dict[str, Any]] - -# tree_dict_make / tree_dict_print -# -# Build a dictionary describing a project tree, respecting GitIgnore. -# -# tree_dict_make(, ) -> tree_dict -# -# () -> bignum | None -# -# Keys of tree_dict: -# - Relative paths from ; the root itself is stored under "". -# -# Values are dicts with: -# 1. 'mtime' : last modification time (float seconds) -# 2. 'node_type' : 'file', 'directory', or 'other' -# 3. 'dir_info' : 'NA', 'leaf', 'branch', or 'root' -# 4. 'checksum' : present only for file nodes when checksum_fn is -# not None -# -# Traversal: -# - Any path (directory or file) for which GitIgnore.check() -# returns 'Ignore' is omitted from the tree_dict. -TreeDict = Dict[str, Dict[str, Any]] - -# tree_dict_make / tree_dict_print -# -# Build a dictionary describing a project tree, respecting GitIgnore. -# -# tree_dict_make(, ) -> tree_dict -# -# () -> bignum | None -# -# Keys of tree_dict: -# - Relative paths from ; the root itself is stored under "". -# -# Values are dicts with: -# 1. 'mtime' : last modification time (float seconds) or None -# 2. 'node_type' : 'file', 'directory', 'other', or 'constrained' -# 3. 'dir_info' : 'NA', 'leaf', 'branch', 'root' -# 4. 'checksum' : present only for file nodes when checksum_fn is -# not None -# -# Traversal: -# - Directories whose relative path GitIgnore.check() marks as -# 'Ignore' are included in tree_dict but not traversed further. -def tree_dict_make( - path: str - ,checksum_fn: Callable[[str], int] | None -) -> Dict[str, Dict[str, Any]]: - """ - Build a tree_dict for the subtree rooted at , respecting GitIgnore. - - Semantics (current): - * Any path (directory or file) for which GitIgnore.check() - returns 'Ignore' is completely omitted from the tree_dict. - * The root directory ('') is always included. - * Directory dir_info: - - 'root' for the root - - 'branch' for directories that have child directories - (after GitIgnore filtering) - - 'leaf' for directories with no child directories - * Non-directory dir_info: - - 'NA' - * Symlinks are classified as file/directory/other based on what - they point to, if accessible. - * If any filesystem access needed for classification/mtime raises, - the node is recorded as node_type='constrained', dir_info='NA', - mtime=None, and we do not attempt checksum. - """ - root = os.path.abspath(path) - gi = GitIgnore(root) - - tree_dict: Dict[str, Dict[str, Any]] = {} - - for dirpath, dirnames, filenames in os.walk(root, topdown=True): - rel_dir = os.path.relpath(dirpath, root) - if rel_dir == ".": - rel_dir = "" - - # Skip ignored directories (except the root). - if rel_dir != "" and gi.check(rel_dir) == "Ignore": - dirnames[:] = [] - continue - - # Filter child directories by GitIgnore so dir_info reflects - # only directories we will actually traverse. - kept_dirnames: List[str] = [] - for dn in list(dirnames): - child_rel = dn if rel_dir == "" else os.path.join(rel_dir, dn) - if gi.check(child_rel) == "Ignore": - dirnames.remove(dn) - else: - kept_dirnames.append(dn) - - # Record the directory node itself - dir_abs = dirpath - try: - dir_mtime = os.path.getmtime(dir_abs) - dir_node_type = "directory" - if rel_dir == "": - dir_info = "root" - elif kept_dirnames: - dir_info = "branch" - else: - dir_info = "leaf" - except OSError: - # Could not stat the directory: treat as constrained. - dir_mtime = None - dir_node_type = "constrained" - dir_info = "NA" - - tree_dict[rel_dir] = { - "mtime": dir_mtime - ,"node_type": dir_node_type - ,"dir_info": dir_info - } - - # For non-ignored directories, record files within - for name in filenames: - abs_path = os.path.join(dirpath, name) - if rel_dir == "": - rel_path = name - else: - rel_path = os.path.join(rel_dir, name) - - if gi.check(rel_path) == "Ignore": - continue - - # Wrap classification + mtime in one try/except so any failure - # marks the node as constrained. - try: - if os.path.islink(abs_path): - # Symlink: classify by target if possible - if os.path.isdir(abs_path): - node_type = "directory" - dir_info_f = "branch" - elif os.path.isfile(abs_path): - node_type = "file" - dir_info_f = "NA" - else: - node_type = "other" - dir_info_f = "NA" - mtime = os.path.getmtime(abs_path) - else: - # Normal node - if os.path.isfile(abs_path): - node_type = "file" - dir_info_f = "NA" - elif os.path.isdir(abs_path): - node_type = "directory" - dir_info_f = "branch" - else: - node_type = "other" - dir_info_f = "NA" - mtime = os.path.getmtime(abs_path) - except OSError: - # Anything that blows up during classification/stat becomes - # constrained; we do not attempt checksum for these. - node_type = "constrained" - dir_info_f = "NA" - mtime = None - - info: Dict[str, Any] = { - "mtime": mtime - ,"node_type": node_type - ,"dir_info": dir_info_f - } - - if node_type == "file" and checksum_fn is not None and isinstance(mtime, (int, float)): - info["checksum"] = checksum_fn(abs_path) - - tree_dict[rel_path] = info - - if meta.debug_has("tree_dict_print"): - tree_dict_print(tree_dict) - - return tree_dict - -def tree_dict_print( - tree_dict: Dict[str, Dict[str, Any]] -) -> None: - """ - Pretty-print a tree_dict produced by tree_dict_make() in fixed-width columns: - - [type] [dir] [mtime] [checksum?] [relative path] - - Only the values are printed in each column (no 'field=' prefixes). - mtime is formatted via the Z module for human readability. - """ - entries: List[tuple[str, str, str, str, str]] = [] - has_checksum = False - - for rel_path in sorted(tree_dict.keys()): - info = tree_dict[rel_path] - display_path = rel_path if rel_path != "" else "." - - type_val = str(info.get("node_type", "")) - dir_val = str(info.get("dir_info", "")) - - raw_mtime = info.get("mtime") - if isinstance(raw_mtime, (int, float)): - mtime_val = meta.z_format_mtime(raw_mtime) - else: - mtime_val = str(raw_mtime) - - if "checksum" in info: - checksum_val = str(info["checksum"]) - has_checksum = True - else: - checksum_val = "" - - entries.append(( - type_val - ,dir_val - ,mtime_val - ,checksum_val - ,display_path - )) - - # Compute column widths - type_w = 0 - dir_w = 0 - mtime_w = 0 - checksum_w = 0 - - for type_val, dir_val, mtime_val, checksum_val, _ in entries: - if len(type_val) > type_w: - type_w = len(type_val) - if len(dir_val) > dir_w: - dir_w = len(dir_val) - if len(mtime_val) > mtime_w: - mtime_w = len(mtime_val) - if has_checksum and len(checksum_val) > checksum_w: - checksum_w = len(checksum_val) - - print("Tree dictionary contents:") - for type_val, dir_val, mtime_val, checksum_val, display_path in entries: - line = " " - line += type_val.ljust(type_w) - line += " " - line += dir_val.ljust(dir_w) - line += " " - line += mtime_val.ljust(mtime_w) - - if has_checksum: - line += " " - line += checksum_val.ljust(checksum_w) - - line += " " - line += display_path - - print(line) - - -def tree_dict_A_minus_B( - A: Dict[str, Dict[str, Any]] - ,B: Dict[str, Dict[str, Any]] -) -> Dict[str, Dict[str, Any]]: - """ - Compute the set difference of two tree_dicts at the key level: - - Result = A \\ B - - That is, return a new tree_dict containing only those entries whose - keys are present in A but NOT present in B. - """ - result: Dict[str, Dict[str, Any]] = {} - - B_keys = set(B.keys()) - - for key, info in A.items(): - if key not in B_keys: - result[key] = info - - if meta.debug_has("tree_dict_A_minus_B"): - tree_dict_print(result) - - return result - - -def tree_dict_in_between_and_below( - A: Dict[str, Dict[str, Any]] - ,B: Dict[str, Dict[str, Any]] -) -> tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]: - """ - Partition nodes of B into two topology-based sets relative to A: - - 1. in_between: - Nodes in B that lie under at least one directory node in A, - but do NOT lie under any leaf directory of A. - - 2. below: - Nodes in B that lie under at least one leaf directory of A. - - Definitions (relative to A's directory topology): - - - A directory node in A is any key whose info['node_type'] == 'directory'. - - - A leaf directory in A is a directory that has no *other* directory - in A as a proper descendant. The project root ('') is therefore - never a leaf (it always has descendant directories if the tree is - non-trivial). - - - “Lies under”: - * For a path p in B, we look at the chain of directory ancestors - (including the root "") and, if p itself is a directory, p - itself. Any of those that appear as directory keys in A are - considered directory ancestors in A. - - * If any of those ancestors is a leaf in A, p goes to 'below'. - Otherwise, if there is at least one directory ancestor in A, - p goes to 'in_between'. - - - Nodes in B that do not lie under any directory in A are ignored. - - Returns: - (in_between_dict, below_dict), both keyed like B and containing - copies of the info dicts from B. - """ - # 1. Collect all directory keys from A - A_dir_keys: Set[str] = set( - key for key, info in A.items() - if info.get("node_type") == "directory" - ) - - # 2. Compute leaf directories in A - leaf_dirs: Set[str] = set() - - for d in A_dir_keys: - if d == "": - continue - - has_child_dir = False - prefix = d + os.sep - - for other in A_dir_keys: - if other == d: - continue - if other.startswith(prefix): - has_child_dir = True - break - - if not has_child_dir: - leaf_dirs.add(d) - - in_between: Dict[str, Dict[str, Any]] = {} - below: Dict[str, Dict[str, Any]] = {} - - for key, info in B.items(): - # Skip B's root - if key in ("", "."): - continue - - parts = key.split(os.sep) - - # Build directory ancestor chain - node_is_dir = (info.get("node_type") == "directory") - - ancestors: List[str] = [""] - prefix = None - - if node_is_dir: - upto = parts - else: - upto = parts[:-1] - - for part in upto: - if prefix is None: - prefix = part - else: - prefix = os.path.join(prefix, part) - ancestors.append(prefix) - - # Filter ancestors to those that exist as directories in A - ancestors_in_A = [d for d in ancestors if d in A_dir_keys] - - if not ancestors_in_A: - # This B node is not under any directory from A; ignore it. - continue - - # Any leaf ancestor in A? - has_leaf_ancestor = any(d in leaf_dirs for d in ancestors_in_A) - - if has_leaf_ancestor: - below[key] = info - else: - in_between[key] = info - - if meta.debug_has("tree_dict_in_between_and_below"): - merged: Dict[str, Dict[str, Any]] = {} - merged.update(in_between) - merged.update(below) - tree_dict_print(merged) - - return in_between, below - - -def tree_dict_newer( - A: Dict[str, Dict[str, Any]] - ,B: Dict[str, Dict[str, Any]] -) -> Dict[str, Dict[str, Any]]: - """ - Return a dictionary of nodes from B that are newer than their - corresponding nodes in A. - - For each key k: - - - If k exists in both A and B, and - - B[k]['mtime'] > A[k]['mtime'], - - then k is included in the result with value B[k]. - - Keys that are only in B (not in A) are ignored here. - """ - result: Dict[str, Dict[str, Any]] = {} - - for key, info_B in B.items(): - info_A = A.get(key) - if info_A is None: - continue - - mtime_A = info_A.get("mtime") - mtime_B = info_B.get("mtime") - - if mtime_A is None or mtime_B is None: - continue - - if mtime_B > mtime_A: - result[key] = info_B - - if meta.debug_has("tree_dict_newer"): - tree_dict_print(result) - - return result - - -def tree_dict_older( - A: Dict[str, Dict[str, Dict[str, Any]]] - ,B: Dict[str, Dict[str, Dict[str, Any]]] -) -> Dict[str, Dict[str, Any]]: - """ - Return a dictionary of nodes from B that are older than their - corresponding nodes in A. - - For each key k: - - - If k exists in both A and B, and - - B[k]['mtime'] < A[k]['mtime'], - - then k is included in the result with value B[k]. - - Keys that are only in B (not in A) are ignored here. - """ - result: Dict[str, Dict[str, Any]] = {} - - for key, info_B in B.items(): - info_A = A.get(key) - if info_A is None: - continue - - mtime_A = info_A.get("mtime") - mtime_B = info_B.get("mtime") - - if mtime_A is None or mtime_B is None: - continue - - if mtime_B < mtime_A: - result[key] = info_B - - if meta.debug_has("tree_dict_older"): - tree_dict_print(result) - - return result - - -def in_between_newer( - A: TreeDict - ,B: TreeDict -) -> TreeDict: - """ - in_between_newer(A, B) -> TreeDict - - Return the subset of B's nodes that: - - 1. Are in the 'in_between' region with respect to A's topology, and - 2. Are "newer" than A at the same path, or absent from A. - - Only file nodes in B are considered. - """ - in_between, _below = tree_dict_in_between_and_below(A, B) - - result: TreeDict = {} - - for path, b_info in in_between.items(): - b_type = b_info.get("node_type") - - # Only consider files for "newer" semantics - if b_type != "file": - continue - - b_mtime = b_info.get("mtime") - a_info = A.get(path) - - # Case 1: path not in A - if a_info is None: - result[path] = b_info - continue - - a_type = a_info.get("node_type") - - # Case 2: A has non-file, B has file - if a_type != "file": - result[path] = b_info - continue - - # Case 3: both files, compare mtime - a_mtime = a_info.get("mtime") - if ( - isinstance(a_mtime, (int, float)) - and isinstance(b_mtime, (int, float)) - and b_mtime > a_mtime - ): - result[path] = b_info - - if meta.debug_has("in_between_newer"): - tree_dict_print(result) - - return result diff --git a/tool/skeleton/temp.txt b/tool/skeleton/temp.txt deleted file mode 100644 index fb4aebf..0000000 --- a/tool/skeleton/temp.txt +++ /dev/null @@ -1,36 +0,0 @@ -Perhaps we are now ready to summit. - -We will add the module 'command.py' - -functions: help, - -The top level function, `dispatch`, will take the list. It will then dispatch a distinct function for each command. - -Each command specific function will use the functions we have written to accomplish the work of the command. See notes below. - -``` -Usage: - check * [] - -Where: - :: | | - - :: version | help | usage - :: environment - :: structure | import | export | suspicious | addendum | all -``` -The meaning of these commands: - -structure: differences in directory structure, directories in A - B - -import: a list of shell copy commands that would copy newer nodes in B into A, or nodes that are not in B into A. Editing this list, then running the shell commands will be helpful for updating the Harmony skeleton (A). - -export: a list of shell copy commands that would copy newer nodes in A into B, or nodes in A that are not in B. Comes from the older list. Editing this list, then running the shell commands will be helpful for keeping the skeleton in B up to date. - -suspicious: basically the InBetween list. Why did the user put things into areas that are part of the skeleton instead of under leaf directories? tools, source code, tests, etc. all have leaf directories for adding things to. What are these files? Perhaps proposed additions to the skeleton? Perhaps bad usage of the skeleton (in general use scenarios this is more likely). - -addendum: this is the work that has been added to the project in proper places. It is interesting to see the work that has been done on the project. - -all: runs structure, import, export, suspicious, and addendum - -it seems I left an analysis out, but it doesn't come to mind at the moment. Can you think of it? diff --git a/tool/skeleton_compaare b/tool/skeleton_compaare new file mode 120000 index 0000000..bd0d011 --- /dev/null +++ b/tool/skeleton_compaare @@ -0,0 +1 @@ +skeleton_compare_source/CLI.py \ No newline at end of file diff --git a/tool/skeleton_compare_source/A_minus_B b/tool/skeleton_compare_source/A_minus_B new file mode 100755 index 0000000..f6f7bbb --- /dev/null +++ b/tool/skeleton_compare_source/A_minus_B @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +A_minus_B - CLI test driver for skeleton.tree_dict_A_minus_B(A, B) + +Usage: + A_minus_B +""" + +from __future__ import annotations + +import os +import sys +from typing import Sequence + +import meta +import skeleton + + +def CLI(argv: Sequence[str] | None = None) -> int: + if argv is None: + argv = sys.argv[1:] + + prog = os.path.basename(sys.argv[0]) if sys.argv else "A_minus_B" + + if len(argv) != 2 or argv[0] in ("-h", "--help"): + print(f"Usage: {prog} ") + return 1 + + A_root = argv[0] + B_root = argv[1] + + if not os.path.isdir(A_root): + print(f"{prog}: {A_root}: not a directory") + return 2 + + if not os.path.isdir(B_root): + print(f"{prog}: {B_root}: not a directory") + return 3 + + A = skeleton.tree_dict_make(A_root, None) + B = skeleton.tree_dict_make(B_root, None) + + meta.debug_set("tree_dict_A_minus_B") + + _result = skeleton.tree_dict_A_minus_B(A, B) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(CLI()) diff --git a/tool/skeleton_compare_source/CLI.py b/tool/skeleton_compare_source/CLI.py new file mode 100755 index 0000000..f7fb0b0 --- /dev/null +++ b/tool/skeleton_compare_source/CLI.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +CLI.py - Harmony skeleton checker + +Grammar (informal): + + check * [] + + :: | | + + :: version | help | usage + :: environment + :: structure | import | export | suspicious | addendum | all + +Commands are sorted into three sets: + 1. HELP_COMMANDS + 2. NO_OTHER_COMMANDS + 3. HAS_OTHER_COMMANDS + +At runtime, argv commands are classified into four lists: + 1. help_list + 2. no_other_list + 3. has_other_list + 4. unclassified_list + +If the meta debug set contains the tag "print_command_lists", these four lists +are printed. + +If 'environment' appears in no_other_list, the meta.printenv() helper +is invoked to print the environment. + +For commands we compare: + + A = Harmony skeleton tree_dict + B = project tree_dict (path is the last argv token when any + is present before it). +""" + +from __future__ import annotations + +import os +import sys +from typing import Sequence + +import command +import doc +import Harmony +import meta +import skeleton + +# meta.debug_set("print_command_lists") + +# Command tag sets (classification universe) +HELP_COMMANDS: set[str] = set([ + "version" + ,"help" + ,"usage" +]) + +NO_OTHER_COMMANDS: set[str] = set([ + "environment" +]) + +HAS_OTHER_COMMANDS: set[str] = set([ + "structure" + ,"import" + ,"export" + ,"suspicious" + ,"addendum" + ,"all" +]) + + +def command_type(arg: str) -> str: + """ + Classify a single command token. + + Returns: + "Help" if arg is a help command + "NoOther" if arg is a no_other command + "HasOther" if arg is a has_other command + "UnClassified" otherwise + """ + if arg in HELP_COMMANDS: + return "Help" + + if arg in NO_OTHER_COMMANDS: + return "NoOther" + + if arg in HAS_OTHER_COMMANDS: + return "HasOther" + + return "UnClassified" + + +def print_command_lists( + help_list: list[str] + ,no_other_list: list[str] + ,has_other_list: list[str] + ,unclassified_list: list[str] +) -> None: + """ + Print the four classified command lists derived from argv. + """ + print("help_list:", help_list) + print("no_other_list:", no_other_list) + print("has_other_list:", has_other_list) + print("unclassified_list:", unclassified_list) + + +def CLI(argv: Sequence[str] | None = None) -> int: + """ + CLI entrypoint. + + Responsibilities: + 1. Accept argv (or sys.argv[1:] by default). + 2. Classify arguments using command_type(), with the last argument + treated specially to avoid aliasing. + 3. Invoke behaviors implied by the commands. + 4. Return integer status code. + + Argument interpretation: + + Let argv = [a0, a1, ..., aN-1]. + + - If N == 0: + no commands; nothing to do. + + - If N >= 1: + * Classify a0..aN-2. + - If any are UnClassified -> error. + + * If any appear in a0..aN-2: + - aN-1 is treated as path (B_root), not classified. + + * If no appear in a0..aN-2: + - Classify aN-1: + - If UnClassified -> error (unknown command). + - If HasOther -> error (other path not specified). + - Else -> added to Help / NoOther lists. + """ + if argv is None: + argv = sys.argv[1:] + + # No arguments: print usage and exit with status 1. + if len(argv) == 0: + doc.print_usage() + return 1 + + # No arguments: nothing to do (could later decide to print usage). + if len(argv) == 0: + return 0 + + # Split into head (all but last) and last argument + head = argv[:-1] + last = argv[-1] + + help_list: list[str] = [] + no_other_list: list[str] = [] + has_other_list: list[str] = [] + unclassified_list: list[str] = [] + + # 1. Classify head tokens + for arg in head: + ct = command_type(arg) + + if ct == "Help": + help_list.append(arg) + elif ct == "NoOther": + no_other_list.append(arg) + elif ct == "HasOther": + has_other_list.append(arg) + else: + unclassified_list.append(arg) + + # Any unclassified in the head is an error + if len(unclassified_list) > 0: + first_bad = unclassified_list[0] + print(f"Unrecognized command: {first_bad}") + return 5 + + head_has_other = (len(has_other_list) > 0) + + B_root: str | None = None + + if head_has_other: + # 2A. Any in head -> last arg is always path. + B_root = os.path.abspath(last) + else: + # 2B. No in head -> classify last. + ct = command_type(last) + + if ct == "UnClassified": + print(f"Unrecognized command: {last}") + return 5 + + if ct == "HasOther": + print("Other path not specified for has_other command(s).") + return 6 + + if ct == "Help": + help_list.append(last) + elif ct == "NoOther": + no_other_list.append(last) + # ct cannot be HasOther here due to earlier check. + + if meta.debug_has("print_command_lists"): + print_command_lists( + help_list + ,no_other_list + ,has_other_list + ,unclassified_list + ) + + # Help handling + if len(help_list) > 0: + if "version" in help_list: + meta.version_print() + if "usage" in help_list: + doc.print_usage() + if "help" in help_list: + doc.print_help() + return 1 + + ret_val = 0 + + # No-other commands (environment, etc.) + if "environment" in no_other_list: + env_status = meta.printenv() + if env_status != 0: + ret_val = env_status + + # If we still have no has_other commands, we are done. + # (Example: just "environment", or just "help/usage".) + if len(has_other_list) == 0: + return ret_val + + # At this point we know: + # - has_other_list is non-empty + # - B_root must have been set (head_has_other was True) + if B_root is None: + print("Internal error: B_root not set despite has_other commands.") + return 7 + + if not os.path.isdir(B_root): + print(f"Other project path is not a directory: {B_root}") + return 4 + + # Determine Harmony root (A_root) + status, A_root = Harmony.where() + + if status == "not-found": + print("Harmony project not found; normally this command is run from within Harmony.") + return 3 + + if status == "different": + print("Seems we are not running in the Harmony project, will exit.") + return 2 + + # Build tree_dicts for A (Harmony) and B (other project) + A_tree = skeleton.tree_dict_make(A_root, None) + B_tree = skeleton.tree_dict_make(B_root, None) + + # Dispatch the commands + cmd_status = command.dispatch( + has_other_list + ,A_tree + ,B_tree + ,A_root + ,B_root + ) + + if cmd_status != 0: + ret_val = cmd_status + + return ret_val + + +if __name__ == "__main__": + raise SystemExit(CLI()) diff --git a/tool/skeleton_compare_source/GitIgnore.py b/tool/skeleton_compare_source/GitIgnore.py new file mode 100755 index 0000000..70c6509 --- /dev/null +++ b/tool/skeleton_compare_source/GitIgnore.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +GitIgnore.py - minimal .gitignore-based helper for Harmony projects + +Behavior: + + 1. During initialization, traverse the project tree rooted at + . + + 2. Whenever a directory contains a '.gitignore' file, record: + - its relative directory path from the project root + - a list of regular expressions compiled from the patterns + in that '.gitignore' file + + These are stored in: + + self.rules: Dict[str, List[Pattern]] + + where the key is the directory RELATIVE to the project root: + "" -> project root (top-level .gitignore) + "src" -> src/.gitignore + "src/module" -> src/module/.gitignore + + 3. check() -> token: + + - is a path relative to the project root. + + - We compute all prefix directories of , including the + root (""), for example: + + path = "a/b/c.py" + prefixes = ["", "a", "a/b"] + + - For each prefix, if there are regexes stored for that directory, + we collect them. + + - We then test ALL collected regexes against the basename of + (the last component only). + + - If ANY regex matches, return 'Ignore'. + Otherwise return 'Accept'. + +Notes: + + * We implement a simplified subset of .gitignore semantics suitable + for your current patterns and add a small base ignore set for + always-ignored names such as '.git'. +""" + +from __future__ import annotations + +import fnmatch +import os +import re +from typing import Dict, List +import Harmony + + +class GitIgnore: + """ + GitIgnore(project_path) + + Attributes: + project_path: + Absolute path to the project root. + + rules: + Mapping from relative directory path -> list of compiled regex + patterns derived from that directory's '.gitignore' file. + + Example: + rules[""] -> patterns from /.gitignore + rules["developer"] -> patterns from developer/.gitignore + + base_patterns: + List of compiled regex patterns applied to the basename of every + checked path, independent of any .gitignore file. Currently used + to always ignore '.git' directories. + """ + + def __init__( + self + ,project_path: str + ) -> None: + """ + Initialize a GitIgnore instance with a path to a project and + scan for '.gitignore' files. + """ + self.project_path: str = os.path.abspath(project_path) + self.rules: Dict[str, List[re.Pattern]] = {} + + # Base patterns: always applied, regardless of .gitignore contents. + # These are matched against basenames only. + self.base_patterns: List[re.Pattern] = [ + re.compile(r"^\.git$") # ignore any basename == ".git" + ] + + self._scan_project() + + def _scan_project(self) -> None: + """ + Traverse the project tree and populate self.rules with entries of + the form: + + -> [Pattern, Pattern, ...] + + where is the directory containing '.gitignore', relative + to the project root ("" for root). + """ + root = self.project_path + + for dirpath, dirnames, filenames in os.walk(root, topdown=True): + if ".gitignore" not in filenames: + continue + + rel_dir = os.path.relpath(dirpath, root) + if rel_dir == ".": + rel_dir = "" + + gitignore_path = os.path.join(dirpath, ".gitignore") + patterns = self._parse_gitignore_file(gitignore_path) + + if patterns: + if rel_dir not in self.rules: + self.rules[rel_dir] = [] + self.rules[rel_dir].extend(patterns) + + def _parse_gitignore_file( + self + ,gitignore_path: str + ) -> List[re.Pattern]: + """ + Parse a single '.gitignore' file into a list of compiled regex patterns. + + Simplified rules: + - Blank lines and lines starting with '#' are ignored. + - Lines containing '/' in the MIDDLE are currently ignored + (future extension). + - Lines ending with '/' are treated as directory name patterns: + '__pycache__/' -> pattern on basename '__pycache__' + - All patterns are treated as name globs and compiled via + fnmatch.translate(), to be matched against basenames only. + """ + patterns: List[re.Pattern] = [] + + try: + with open(gitignore_path, "r", encoding="utf-8") as f: + for raw_line in f: + line = raw_line.strip() + + # Skip comments and blank lines + if not line or line.startswith("#"): + continue + + # Remove trailing '/' for directory patterns (e.g. '__pycache__/') + if line.endswith("/"): + line = line[:-1].strip() + if not line: + continue + + # If there is still a '/' in the line, we do not support this + # pattern in this minimal implementation. + if "/" in line: + continue + + # Compile as a name glob -> regex + regex_text = fnmatch.translate(line) + patterns.append(re.compile(regex_text)) + + except OSError: + # If the .gitignore cannot be read, just skip it. + return patterns + + return patterns + + def check( + self + ,path: str + ) -> str: + """ + Check a path against the collected .gitignore patterns. + + path: + A path relative to the project root. + + Returns: + 'Ignore' if any applicable pattern matches the basename of the path, + otherwise 'Accept'. + """ + # Normalize the incoming path + norm = os.path.normpath(path) + + # If the path is '.' or empty, we accept it + if norm in ("", "."): + return "Accept" + + basename = os.path.basename(norm) + + # First, apply base patterns (always applied). + for pat in self.base_patterns: + if pat.match(basename): + return "Ignore" + + # Build the list of directories that may contribute .gitignore rules. + # + # For path "a/b/c": + # prefixes: ["", "a", "a/b"] + parts = norm.split(os.sep) + + prefixes: List[str] = [""] + prefix = None + for part in parts[:-1]: + if prefix is None: + prefix = part + else: + prefix = os.path.join(prefix, part) + prefixes.append(prefix) + + # Collect all patterns from the applicable .gitignore directories + for rel_dir in prefixes: + dir_patterns = self.rules.get(rel_dir) + if not dir_patterns: + continue + + for pat in dir_patterns: + if pat.match(basename): + return "Ignore" + + return "Accept" + + +def test_GitIgnore() -> int: + """ + 1. Locate the Harmony project root using Harmony.where(). + 2. Create a GitIgnore instance rooted at that path. + 3. Print: + - directories that have .gitignore rules + - directories (relative) that would be ignored by check() + """ + status, Harmony_root = Harmony.where() + + if status == "not-found": + print("Harmony project not found; cannot test GitIgnore.") + return 1 + + if status == "different": + print("Warning: Harmony not found, using nearest .git directory for GitIgnore test.") + + gi = GitIgnore(Harmony_root) + + print(".gitignore rule directories (relative to Harmony root):") + for rel_dir in sorted(gi.rules.keys()): + print(f" {rel_dir if rel_dir else '.'}") + + print("\nDirectories that would be ignored (relative to Harmony root):") + for dirpath, dirnames, filenames in os.walk(Harmony_root, topdown=True): + rel_dir = os.path.relpath(dirpath, Harmony_root) + if rel_dir == ".": + rel_dir = "" + + if gi.check(rel_dir) == "Ignore": + print(f" {rel_dir if rel_dir else '.'}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(test_GitIgnore()) diff --git a/tool/skeleton_compare_source/Harmony.py b/tool/skeleton_compare_source/Harmony.py new file mode 100644 index 0000000..9385507 --- /dev/null +++ b/tool/skeleton_compare_source/Harmony.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +locate the project root +""" + +from __future__ import annotations + +import meta +import os +import sys +from typing import Any, Callable, Dict + +# where +# +# Context / assumptions: +# 1. This module lives somewhere under the Harmony tree, for example: +# /.../Harmony/tool/skeleton/skeleton.py +# 2. CLI.py is run from somewhere inside the same tree (or a clone). +# +# Search behavior: +# 1. Start from the directory containing this file. +# 2. Walk upward towards the filesystem root, with limits: +# a) Do not move up more than 5 levels. +# b) Stop immediately if the current directory contains a +# '.git' subdirectory. +# +# Result classification: +# status is one of: +# 'found' -> we found a directory whose basename is 'Harmony' +# 'different' -> we stopped at a directory that has a '.git' +# subdirectory, but its basename is not 'Harmony' +# 'not-found' -> we hit the 5-level limit or filesystem root +# without finding 'Harmony' or a '.git' directory +# +# Path: +# - In all cases, the returned path is the last directory inspected: +# * the 'Harmony' directory (status 'found'), or +# * the directory with '.git' (status 'different'), or +# * the directory at the 5-level limit / filesystem root +# (status 'not-found'). +# +# Debug printing: +# - If meta.debug_has("print_Harmony_root") is true, print: +# * "The Harmony project root found at: {path}" +# when status == 'found' +# * "Harmony not found, but found: {path}" +# when status == 'different' +# * "Harmony not found." +# when status == 'not-found' +def where() -> tuple[str, str]: + """ + Locate the Harmony root (or best guess). + + Returns: + (status, path) + """ + here = os.path.abspath(__file__) + d = os.path.dirname(here) + + harmony_root = None + status = "not-found" + + max_up = 5 + steps = 0 + + while True: + base = os.path.basename(d) + + # Case 1: exact 'Harmony' directory name + if base == "Harmony": + harmony_root = d + status = "found" + break + + # Case 2: stop at a directory that has a '.git' subdirectory + git_dir = os.path.join(d, ".git") + if os.path.isdir(git_dir): + harmony_root = d + if base == "Harmony": + status = "found" + else: + status = "different" + break + + parent = os.path.dirname(d) + + # Stop if we hit filesystem root + if parent == d: + harmony_root = d + status = "not-found" + break + + steps += 1 + if steps > max_up: + # Reached search depth limit; last inspected directory is d + harmony_root = d + status = "not-found" + break + + d = parent + + if harmony_root is None: + # Extremely defensive; in practice harmony_root will be set above. + harmony_root = d + + root_base = os.path.basename(harmony_root) + + # Warning to stderr if we are not literally in a 'Harmony' directory + if root_base != "Harmony": + sys.stderr.write( + f"WARNING: Harmony root basename is '{root_base}', expected 'Harmony'.\n" + ) + + if meta.debug_has("print_Harmony_root"): + if status == "found": + print(f"The Harmony project root found at: {harmony_root}") + elif status == "different": + print(f"Harmony not found, but found: {harmony_root}") + else: + print("Harmony not found.") + + return status, harmony_root + +def test_where() -> int: + """ + Simple test that prints the Harmony root using the debug flag. + """ + meta.debug_set("print_Harmony_root") + status, _root = where() + return 0 if status != "not-found" else 1 + diff --git a/tool/skeleton_compare_source/Harmony_where b/tool/skeleton_compare_source/Harmony_where new file mode 100755 index 0000000..9d39f1e --- /dev/null +++ b/tool/skeleton_compare_source/Harmony_where @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +Harmony_where - CLI to locate the Harmony project root + +Usage: + Harmony_where + +Prints the status and path returned by Harmony.where(). +""" + +from __future__ import annotations + +import sys + +import Harmony + + +def CLI(argv=None) -> int: + # Ignore argv; no arguments expected + status, Harmony_root = Harmony.where() + + if status == "found": + print(f"Harmony project root found at: {Harmony_root}") + return 0 + + if status == "different": + print(f"Harmony not found, but nearest .git directory is: {Harmony_root}") + return 1 + + print("Harmony project root not found.") + return 2 + + +if __name__ == "__main__": + raise SystemExit(CLI()) diff --git a/tool/skeleton_compare_source/check b/tool/skeleton_compare_source/check new file mode 120000 index 0000000..45a8ec1 --- /dev/null +++ b/tool/skeleton_compare_source/check @@ -0,0 +1 @@ +CLI.py \ No newline at end of file diff --git a/tool/skeleton_compare_source/command.py b/tool/skeleton_compare_source/command.py new file mode 100644 index 0000000..155340a --- /dev/null +++ b/tool/skeleton_compare_source/command.py @@ -0,0 +1,508 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +command.py - high-level dispatch for Harmony check commands + +Commands (semantics): + + structure: + - Differences in directory structure: directories present in A but + not present as directories in B. + + import: + - Shell copy commands to copy: + * in-between nodes in B that are newer than A (same relative path), or + * in-between nodes in B that do not exist in A at all. + Direction: B -> A + Also emits: + * a mkdir list (directories to create in A) + * an "other" list for type mismatches / non-file/dir nodes. + + export: + - Shell copy commands to copy: + * nodes in A that are newer than B, and + * nodes in A that do not exist in B. + Direction: A -> B + Also emits: + * a mkdir list (directories to create in B) + * an "other" list for type mismatches / non-file/dir nodes. + + suspicious: + - Nodes in B that fall "in between" the Harmony skeleton topology: + under some A directory, but not under any A leaf directory. + (tree_dict_in_between_and_below(A,B).in_between) + + addendum: + - Nodes in B that fall "below" Harmony leaf directories: + added work in appropriate extension locations. + (tree_dict_in_between_and_below(A,B).below) + + all: + - Runs structure, import, export, suspicious, and addendum. +""" + +from __future__ import annotations + +import os +from typing import Any, Dict, List, Tuple + +import skeleton + +TreeDict = Dict[str, Dict[str, Any]] + + +def shell_quote( + s: str +) -> str: + """ + Minimal single-quote shell quoting. + """ + return "'" + s.replace("'", "'\"'\"'") + "'" + + +def _print_header( + title: str +) -> None: + print() + print(f"== {title} ==") + + +# ---------------------------------------------------------------------- +# structure: directories in A that are missing / non-directories in B +# ---------------------------------------------------------------------- +def cmd_structure( + A: TreeDict + ,B: TreeDict +) -> int: + """ + structure: differences in directory structure, directories in A - B. + + We include any path where: + - A[path].node_type == 'directory', and + - either path not in B, or B[path].node_type != 'directory'. + """ + structural: TreeDict = {} + + for path, info_A in A.items(): + if info_A.get("node_type") != "directory": + continue + + info_B = B.get(path) + if info_B is None or info_B.get("node_type") != "directory": + structural[path] = info_A + + if not structural: + _print_header("structure") + print("No structural directory differences (A - B).") + return 0 + + _print_header("structure: directories in A not in B") + skeleton.tree_dict_print(structural) + return 0 + + +# ---------------------------------------------------------------------- +# import: B -> A (mkdir, cp, and "other" list), using in_between_newer +# ---------------------------------------------------------------------- +def build_import_commands( + A_tree: TreeDict + ,B_tree: TreeDict + ,A_root: str + ,B_root: str +) -> Tuple[List[str], List[str], List[str]]: + """ + Compute shell commands to update A from B. + + Returns: + (mkdir_cmds, cp_cmds, other_list) + + Semantics: + + mkdir_cmds: + - Directories that are directories in B, but are missing in A. + - We DO NOT auto-resolve type mismatches (e.g. B=directory, + A=file); those go into other_list. + + cp_cmds: + - Files where: + * the path does not exist in A, OR + * the node in A is not a file, OR + * the B copy is newer than A (mtime comparison). + - However, if A has a non-file at that path, we treat it as a + type mismatch and add that path to other_list instead of + emitting a cp command. + + other_list: + - Human-readable notes for: + * type mismatches between A and B, and + * nodes in B that are neither 'file' nor 'directory'. + """ + mkdir_cmds: List[str] = [] + cp_cmds: List[str] = [] + other_list: List[str] = [] + + for rel_path, b_info in B_tree.items(): + b_type = b_info.get("node_type") + rel_display = rel_path if rel_path else "." + + a_info = A_tree.get(rel_path) + a_type = a_info.get("node_type") if a_info is not None else "MISSING" + + # Case 1: B node is neither file nor directory -> other_list + if b_type not in ("file", "directory"): + other_list.append( + f"{rel_display}: A={a_type}, B={b_type}" + ) + continue + + # Case 2: B directory + if b_type == "directory": + if a_info is None: + # Missing in A: safe to mkdir -p + target_dir = os.path.join(A_root, rel_path) if rel_path else A_root + mkdir_cmds.append(f"mkdir -p {shell_quote(target_dir)}") + else: + # Exists in A: must also be a directory to be "structurally OK" + if a_type != "directory": + # Type mismatch: do not mkdir, just report + other_list.append( + f"{rel_display}: A={a_type}, B=directory" + ) + continue + + # Case 3: B file + # Decide whether to copy B -> A, or report conflict. + if a_info is None: + # B-only file + src = os.path.join(B_root, rel_path) if rel_path else B_root + dst = A_root + cp_cmds.append( + f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + ) + continue + + # A has something at this path + if a_type != "file": + # Type mismatch (e.g. A=directory, B=file, or A=other) + other_list.append( + f"{rel_display}: A={a_type}, B=file" + ) + continue + + # Both files: compare mtime + a_mtime = a_info.get("mtime") + b_mtime = b_info.get("mtime") + + if isinstance(a_mtime, (int, float)) and isinstance(b_mtime, (int, float)): + if b_mtime > a_mtime: + src = os.path.join(B_root, rel_path) if rel_path else B_root + dst = A_root + cp_cmds.append( + f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + ) + + return mkdir_cmds, cp_cmds, other_list + + +def cmd_import( + A_tree: TreeDict + ,B_tree: TreeDict + ,A_root: str + ,B_root: str +) -> int: + """ + import: update the skeleton (A) from the project (B), + using only in_between_newer nodes. + """ + inb_newer = skeleton.in_between_newer(A_tree, B_tree) + + mkdir_cmds, cp_cmds, other_list = build_import_commands( + A_tree + ,inb_newer + ,A_root + ,B_root + ) + + print("== import: copy from B -> A (in-between newer only) ==") + print(f"# A root: {A_root}") + print(f"# B root: {B_root}") + print("# Only considering in-between files that are new or absent in A.") + print("#") + + print("# Directories to create in A (mkdir -p):") + if mkdir_cmds: + for line in mkdir_cmds: + print(line) + else: + print("# (none)") + print("#") + + print("# Files to copy from B -> A (cp --parents -a):") + if cp_cmds: + for line in cp_cmds: + print(line) + else: + print("# (none)") + print("#") + + print("# Nodes NOT handled automatically (type mismatches / non-file/dir):") + if other_list: + for rel in other_list: + print(f"# {rel}") + else: + print("# (none)") + + return 0 + + +# ---------------------------------------------------------------------- +# export: A -> B (mkdir, cp, and "other" list) +# ---------------------------------------------------------------------- +def build_export_commands( + A_tree: TreeDict + ,B_tree: TreeDict + ,A_root: str + ,B_root: str +) -> Tuple[List[str], List[str], List[str]]: + """ + Compute shell commands to update B from A. + + Returns: + (mkdir_cmds, cp_cmds, other_list) + + Semantics: + + mkdir_cmds: + - Directories that are directories in A, but are missing in B. + - Type mismatches go into other_list. + + cp_cmds: + - Files where: + * the path does not exist in B, OR + * the node in B is not a file, OR + * the A copy is newer than B (mtime comparison). + - If B has a non-file while A has a file, treat as type mismatch. + + other_list: + - Human-readable notes for: + * type mismatches between A and B, and + * nodes in A that are neither 'file' nor 'directory'. + """ + mkdir_cmds: List[str] = [] + cp_cmds: List[str] = [] + other_list: List[str] = [] + + for rel_path, a_info in A_tree.items(): + a_type = a_info.get("node_type") + rel_display = rel_path if rel_path else "." + + b_info = B_tree.get(rel_path) + b_type = b_info.get("node_type") if b_info is not None else "MISSING" + + # Case 1: A node is neither file nor directory -> other_list + if a_type not in ("file", "directory"): + other_list.append( + f"{rel_display}: A={a_type}, B={b_type}" + ) + continue + + # Case 2: A directory + if a_type == "directory": + if b_info is None: + # Missing in B: safe to mkdir -p + target_dir = os.path.join(B_root, rel_path) if rel_path else B_root + mkdir_cmds.append(f"mkdir -p {shell_quote(target_dir)}") + else: + # Exists in B: must also be directory + if b_type != "directory": + other_list.append( + f"{rel_display}: A=directory, B={b_type}" + ) + continue + + # Case 3: A file + if b_info is None: + # A-only file + src = os.path.join(A_root, rel_path) if rel_path else A_root + dst = B_root + cp_cmds.append( + f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + ) + continue + + if b_type != "file": + other_list.append( + f"{rel_display}: A=file, B={b_type}" + ) + continue + + # Both files: compare mtime + a_mtime = a_info.get("mtime") + b_mtime = b_info.get("mtime") + + if isinstance(a_mtime, (int, float)) and isinstance(b_mtime, (int, float)): + if a_mtime > b_mtime: + src = os.path.join(A_root, rel_path) if rel_path else A_root + dst = B_root + cp_cmds.append( + f"cp --parents -a {shell_quote(src)} {shell_quote(dst)}/" + ) + + return mkdir_cmds, cp_cmds, other_list + + +def cmd_export( + A_tree: TreeDict + ,B_tree: TreeDict + ,A_root: str + ,B_root: str +) -> int: + """ + export: show directory creation and copy commands A -> B. + """ + mkdir_cmds, cp_cmds, other_list = build_export_commands( + A_tree + ,B_tree + ,A_root + ,B_root + ) + + print("== export: copy from A -> B ==") + print(f"# A root: {A_root}") + print(f"# B root: {B_root}") + print("#") + + print("# Directories to create in B (mkdir -p):") + if mkdir_cmds: + for line in mkdir_cmds: + print(line) + else: + print("# (none)") + print("#") + + print("# Files to copy from A -> B (cp --parents -a):") + if cp_cmds: + for line in cp_cmds: + print(line) + else: + print("# (none)") + print("#") + + print("# Nodes NOT handled automatically (type mismatches / non-file/dir):") + if other_list: + for rel in other_list: + print(f"# {rel}") + else: + print("# (none)") + + return 0 + + +# ---------------------------------------------------------------------- +# suspicious / addendum via in_between_and_below +# ---------------------------------------------------------------------- +def cmd_suspicious( + A: TreeDict + ,B: TreeDict +) -> int: + """ + suspicious: nodes in B that fall 'in between' the Harmony skeleton, + not under leaf directories. + """ + in_between, _below = skeleton.tree_dict_in_between_and_below(A, B) + + _print_header("suspicious: nodes in-between Harmony leaves") + + if not in_between: + print("No suspicious nodes found in B (relative to A).") + return 0 + + skeleton.tree_dict_print(in_between) + return 0 + + +def cmd_addendum( + A: TreeDict + ,B: TreeDict +) -> int: + """ + addendum: nodes in B that fall 'below' Harmony leaf directories. + """ + _in_between, below = skeleton.tree_dict_in_between_and_below(A, B) + + _print_header("addendum: nodes added under Harmony leaves") + + if not below: + print("No addendum nodes found in B (relative to A).") + return 0 + + skeleton.tree_dict_print(below) + return 0 + + +# ---------------------------------------------------------------------- +# Top-level dispatcher +# ---------------------------------------------------------------------- +def dispatch( + has_other_list: List[str] + ,A: TreeDict + ,B: TreeDict + ,A_root: str + ,B_root: str +) -> int: + """ + Dispatch commands. + + has_other_list: + List of command tokens (subset of: + 'structure', 'import', 'export', 'suspicious', 'addendum', 'all'). + + A, B: + tree_dicts for Harmony skeleton (A) and project (B). + + A_root, B_root: + Root paths corresponding to A and B (for copy commands). + """ + cmds = set(has_other_list) + + if "all" in cmds: + cmds.update([ + "structure" + ,"import" + ,"export" + ,"suspicious" + ,"addendum" + ]) + + ordered = [ + "structure" + ,"import" + ,"export" + ,"suspicious" + ,"addendum" + ] + + status = 0 + + for name in ordered: + if name not in cmds: + continue + + if name == "structure": + rc = cmd_structure(A, B) + elif name == "import": + rc = cmd_import(A, B, A_root, B_root) + elif name == "export": + rc = cmd_export(A, B, A_root, B_root) + elif name == "suspicious": + rc = cmd_suspicious(A, B) + elif name == "addendum": + rc = cmd_addendum(A, B) + else: + rc = 0 + + if rc != 0: + status = rc + + return status diff --git a/tool/skeleton_compare_source/doc.py b/tool/skeleton_compare_source/doc.py new file mode 100644 index 0000000..3198b96 --- /dev/null +++ b/tool/skeleton_compare_source/doc.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +doc.py - usage and help text for the Harmony 'check' tool + +Grammar (informal): + + * [] + + :: | | + + :: version | help | usage + :: environment + :: structure | import | export | suspicious | addendum | all +""" + +from __future__ import annotations + +import meta +import os +import sys +from typing import TextIO + + +def prog_name() -> str: + """ + Return the program name as invoked by the user. + + Typically: + - basename(sys.argv[0]) when running from the shell. + - Falls back to 'check' if argv[0] is empty. + """ + raw = sys.argv[0] if sys.argv and sys.argv[0] else "check" + base = os.path.basename(raw) or raw + return base + + +def _usage_text(prog: str) -> str: + return f"""\ +Usage: + {prog} * [] + +Where: + :: | | + + :: version | help | usage + :: environment + :: structure | import | export | suspicious | addendum | all +""" + +def _help_text(prog: str) -> str: + return f"""\ +{prog} - Harmony skeleton integrity and metadata checker + +Syntax: + {prog} * [] + +Where: + :: path + :: | | + + :: version | help | usage + :: environment + :: structure | import | export | suspicious | addendum | all + +Argument rules (informal): + 1. commands are processed first, and then the program returns. + Hence if any help commands are present, the remaining commands + are ignored. + + 2. We assume {prog} is run within the Harmony skeleton, or a skeleton + derived directly from it. This is the 'default skeleton', or simply 'A'. + + 3. The path is the directory of a project that is assumed to + be built upon the default skeleton. This second project root is + called 'B'. + + 4. If none of the commands require an path, then + must not be given. If at least one command requires , then + is required. Commands that require a path are called + commands. + + 5. Implementation detail: + All arguments except the final one are interpreted strictly as + command tokens. If any of those are , the final argument + is taken as . If none of the earlier tokens are , + the final argument is also treated as a command token. + +Roots: + A = Skeleton project root (auto-detected). Usually the Harmony skeleton. + B = project root (supplied when required). + +{prog} compares A with B. Differences may come from: + - edits to the skeleton itself, + - edits to skeleton files inside B, + - or new files/directories added to B. +Conceptually, A and B are any two non-overlapping directory trees. + +Command semantics: + structure + - Report directory-structure differences: + directories present in A that are missing in B or not + directories in B. + - Output: table of such directories. + + import + - Update A from B using only "in-between newer" files: + * files in B that lie in the 'in-between' region relative to A, and + * are newer than A or absent from A. + - Also emits: + * directories to create in A, + * files to copy (B -> A), + * nodes that cannot be handled automatically (type mismatches, + constrained nodes, non-file/dir nodes). + - Direction: B -> A + + export + - Update B from A: + * files in A newer than B at the same path, + * files present in A but missing in B. + - Also emits: + * directories to create in B, + * files to copy (A -> B), + * nodes that cannot be handled automatically. + - Direction: A -> B + + suspicious + - Report B nodes that lie "in-between" Harmony leaves: + under a directory from A, but not under any leaf directory of A. + - Indicates questionable placements or missing skeleton structure. + + addendum + - Report B nodes located "below" Harmony leaf directories: + project-specific additions placed in proper extension points. + + all + - Run: structure, import, export, suspicious, addendum (in that order). + +Notes: + - tree_dict traversal respects a simplified .gitignore model plus + always-ignored patterns (e.g. '.git'). + - Timestamps are formatted via the Z helper in UTC (ISO 8601). +""" + +def print_usage( + stream: TextIO | None = None +) -> None: + """ + Print the usage text to the given stream (default: sys.stdout), + using the actual program name as invoked. + """ + if stream is None: + stream = sys.stdout + + text = _usage_text(prog_name()) + stream.write(text) + if not text.endswith("\n"): + stream.write("\n") + + +def print_help( + stream: TextIO | None = None +) -> None: + """ + Print the help text to the given stream (default: sys.stdout), + using the actual program name as invoked. + """ + if stream is None: + stream = sys.stdout + + utext = _usage_text(prog_name()) + htext = _help_text(prog_name()) + + stream.write(utext) + if not utext.endswith("\n"): + stream.write("\n") + + stream.write("\n") + stream.write(htext) + if not htext.endswith("\n"): + stream.write("\n") diff --git a/tool/skeleton_compare_source/in_between_and_below b/tool/skeleton_compare_source/in_between_and_below new file mode 100755 index 0000000..2993767 --- /dev/null +++ b/tool/skeleton_compare_source/in_between_and_below @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +in_between_and_below - CLI test driver for skeleton.tree_dict_in_between_and_below(A, B) + +Usage: + in_between_and_below +""" + +from __future__ import annotations + +import os +import sys +from typing import Sequence + +import meta +import skeleton + + +def CLI(argv: Sequence[str] | None = None) -> int: + if argv is None: + argv = sys.argv[1:] + + prog = os.path.basename(sys.argv[0]) if sys.argv else "in_between_and_below" + + if len(argv) != 2 or argv[0] in ("-h", "--help"): + print(f"Usage: {prog} ") + return 1 + + A_root = argv[0] + B_root = argv[1] + + if not os.path.isdir(A_root): + print(f"{prog}: {A_root}: not a directory") + return 2 + + if not os.path.isdir(B_root): + print(f"{prog}: {B_root}: not a directory") + return 3 + + A = skeleton.tree_dict_make(A_root, None) + B = skeleton.tree_dict_make(B_root, None) + + meta.debug_set("tree_dict_in_between_and_below") + + _result = skeleton.tree_dict_in_between_and_below(A, B) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(CLI()) diff --git a/tool/skeleton_compare_source/load_command_module.py b/tool/skeleton_compare_source/load_command_module.py new file mode 100644 index 0000000..226b6dd --- /dev/null +++ b/tool/skeleton_compare_source/load_command_module.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +load_command_module.py - locate and import Python command modules from $PATH + +Behavior: + 1. Search $PATH for an executable with the given command name. + 2. Prefer a path containing '/incommon/'. + 3. If only /usr/bin/ is found, raise an error saying we were + looking for the incommon version. + 4. Import the chosen script as a Python module, even if it has no .py + extension, by forcing a SourceFileLoader. +""" + +from __future__ import annotations + +import importlib.util +import os +from importlib.machinery import SourceFileLoader +from types import ModuleType +from typing import List + + +def _find_command_candidates(command_name: str) -> List[str]: + """ + Return a list of absolute paths to executables named `command_name` + found on $PATH. + """ + paths: list[str] = [] + + path_env = os.environ.get("PATH", "") + for dir_path in path_env.split(os.pathsep): + if not dir_path: + continue + candidate = os.path.join(dir_path, command_name) + if os.path.isfile(candidate) and os.access(candidate, os.X_OK): + paths.append(os.path.realpath(candidate)) + + return paths + + +def load_command_module(command_name: str) -> ModuleType: + """ + Locate an executable named `command_name` on $PATH and load it + as a Python module. + + Selection policy: + 1. Prefer any path containing '/incommon/'. + 2. If only /usr/bin/ candidates exist, raise an error + saying we were looking for the incommon version. + 3. If no candidate is found, raise an error. + + Implementation detail: + Because the incommon command may lack a .py suffix, we explicitly + construct a SourceFileLoader rather than relying on the default + extension-based loader resolution. + """ + candidates = _find_command_candidates(command_name) + + incommon_candidates = [ + p + for p in candidates + if "/incommon/" in p + ] + + usrbin_candidates = [ + p + for p in candidates + if p.startswith("/usr/bin/") + ] + + if incommon_candidates: + target = incommon_candidates[0] + elif usrbin_candidates: + raise RuntimeError( + f"Found /usr/bin/{command_name}, but expected the incommon Python " + f"{command_name} module on PATH." + ) + else: + raise RuntimeError( + f"Could not find an incommon '{command_name}' module on PATH." + ) + + module_name = f"rt_incommon_{command_name}" + + loader = SourceFileLoader( + module_name + ,target + ) + spec = importlib.util.spec_from_loader( + module_name + ,loader + ) + if spec is None: + raise RuntimeError(f"Failed to create spec for {command_name} from {target}") + + module = importlib.util.module_from_spec(spec) + # spec.loader is the SourceFileLoader we just created + assert spec.loader is not None + spec.loader.exec_module(module) + + return module diff --git a/tool/skeleton_compare_source/make_Harmony_tree_dict b/tool/skeleton_compare_source/make_Harmony_tree_dict new file mode 100755 index 0000000..2ed3cea --- /dev/null +++ b/tool/skeleton_compare_source/make_Harmony_tree_dict @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +skeleton_test - build and print the Harmony tree_dict + +Usage: + skeleton_test + +Behavior: + 1. Locate the Harmony project root via Harmony.where(). + 2. Enable 'tree_dict_print' debug flag. + 3. Call skeleton.tree_dict_make(Harmony_root, None). + +The skeleton.tree_dict_make() function is expected to call +tree_dict_print() when the 'tree_dict_print' debug flag is set. +""" + +from __future__ import annotations + +import sys + +import Harmony +import meta +import skeleton + + +def CLI(argv=None) -> int: + # No arguments expected + status, Harmony_root = Harmony.where() + + if status == "not-found": + print("Harmony project not found; cannot build tree_dict.") + return 1 + + if status == "different": + print("Warning: Harmony not found, using nearest .git directory for tree_dict.") + + # Enable printing inside tree_dict_make + meta.debug_set("tree_dict_print") + + _tree = skeleton.tree_dict_make(Harmony_root, None) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(CLI()) diff --git a/tool/skeleton_compare_source/meta.py b/tool/skeleton_compare_source/meta.py new file mode 100644 index 0000000..5c8da89 --- /dev/null +++ b/tool/skeleton_compare_source/meta.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +meta.py - thin wrappers around command modules + +Current responsibilities: + 1. Load the incommon 'printenv' command module (no .py extension) + using load_command_module.load_command_module(). + 2. Expose printenv() here, calling the imported printenv() work + function with default arguments (equivalent to running without + any CLI arguments). + 3. Provide a simple version printer for this meta module. + 4. Provide a small debug tag API (set/clear/has). +""" + +from __future__ import annotations + +import datetime +from load_command_module import load_command_module + + +# Load the incommon printenv module once at import time +_PRINTENV_MODULE = load_command_module("printenv") +_Z_MODULE = load_command_module("Z") + + +# Meta module version +_major = 1 +_minor = 7 +def version_print() -> None: + """ + Print the meta module version as MAJOR.MINOR. + """ + print(f"{_major}.{_minor}") + + +# Debug tag set and helpers +_debug = set([ +]) + + +def debug_set(tag: str) -> None: + """ + Add a debug tag to the meta debug set. + """ + _debug.add(tag) + + +def debug_clear(tag: str) -> None: + """ + Remove a debug tag from the meta debug set, if present. + """ + _debug.discard(tag) + + +def debug_has(tag: str) -> bool: + """ + Return True if the given debug tag is present. + """ + return tag in _debug + + +# Touch the default tag once so static checkers do not complain about +# unused helpers when imported purely for side-effects. +debug_has("Command") + + +def printenv() -> int: + """ + Call the imported printenv() work function with default arguments: + - no null termination + - no newline quoting + - no specific names (print full environment) + - prog name 'printenv' + """ + return _PRINTENV_MODULE.printenv( + False # null_terminate + ,False # quote_newlines + ,[] # names + ,"printenv" + ) + + +def z_format_mtime( + mtime: float +) -> str: + """ + Format a POSIX mtime (seconds since epoch, UTC) using the Z module. + + Uses Z.ISO8601_FORMAT and Z.make_timestamp(dt=...). + """ + dt = datetime.datetime.fromtimestamp(mtime, datetime.timezone.utc) + return _Z_MODULE.make_timestamp( + fmt=_Z_MODULE.ISO8601_FORMAT + ,dt=dt + ) diff --git a/tool/skeleton_compare_source/newer b/tool/skeleton_compare_source/newer new file mode 100755 index 0000000..30aa373 --- /dev/null +++ b/tool/skeleton_compare_source/newer @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +newer - CLI test driver for skeleton.tree_dict_newer(A, B) + +Usage: + newer +""" + +from __future__ import annotations + +import os +import sys +from typing import Sequence + +import meta +import skeleton + + +def CLI(argv: Sequence[str] | None = None) -> int: + if argv is None: + argv = sys.argv[1:] + + prog = os.path.basename(sys.argv[0]) if sys.argv else "newer" + + if len(argv) != 2 or argv[0] in ("-h", "--help"): + print(f"Usage: {prog} ") + return 1 + + A_root = argv[0] + B_root = argv[1] + + if not os.path.isdir(A_root): + print(f"{prog}: {A_root}: not a directory") + return 2 + + if not os.path.isdir(B_root): + print(f"{prog}: {B_root}: not a directory") + return 3 + + A = skeleton.tree_dict_make(A_root, None) + B = skeleton.tree_dict_make(B_root, None) + + meta.debug_set("tree_dict_newer") + + _result = skeleton.tree_dict_newer(A, B) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(CLI()) diff --git a/tool/skeleton_compare_source/older b/tool/skeleton_compare_source/older new file mode 100755 index 0000000..f8ff24d --- /dev/null +++ b/tool/skeleton_compare_source/older @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +older - CLI test driver for skeleton.tree_dict_older(A, B) + +Usage: + older +""" + +from __future__ import annotations + +import os +import sys +from typing import Sequence + +import meta +import skeleton + + +def CLI(argv: Sequence[str] | None = None) -> int: + if argv is None: + argv = sys.argv[1:] + + prog = os.path.basename(sys.argv[0]) if sys.argv else "older" + + if len(argv) != 2 or argv[0] in ("-h", "--help"): + print(f"Usage: {prog} ") + return 1 + + A_root = argv[0] + B_root = argv[1] + + if not os.path.isdir(A_root): + print(f"{prog}: {A_root}: not a directory") + return 2 + + if not os.path.isdir(B_root): + print(f"{prog}: {B_root}: not a directory") + return 3 + + A = skeleton.tree_dict_make(A_root, None) + B = skeleton.tree_dict_make(B_root, None) + + meta.debug_set("tree_dict_older") + + _result = skeleton.tree_dict_older(A, B) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(CLI()) diff --git a/tool/skeleton_compare_source/skeleton.py b/tool/skeleton_compare_source/skeleton.py new file mode 100644 index 0000000..4f51d48 --- /dev/null +++ b/tool/skeleton_compare_source/skeleton.py @@ -0,0 +1,570 @@ +#!/usr/bin/env python3 +# -*- mode: python; coding: utf-8; python-indent-offset: 2; indent-tabs-mode: nil -*- + +""" +skeleton.py - helpers for working with the Harmony skeleton tree +""" + +from __future__ import annotations + +import os +import sys +from typing import Any, Callable, Dict, List, Set + +import meta +from GitIgnore import GitIgnore +import Harmony + +TreeDict = Dict[str, Dict[str, Any]] + +# tree_dict_make / tree_dict_print +# +# Build a dictionary describing a project tree, respecting GitIgnore. +# +# tree_dict_make(, ) -> tree_dict +# +# () -> bignum | None +# +# Keys of tree_dict: +# - Relative paths from ; the root itself is stored under "". +# +# Values are dicts with: +# 1. 'mtime' : last modification time (float seconds) +# 2. 'node_type' : 'file', 'directory', or 'other' +# 3. 'dir_info' : 'NA', 'leaf', 'branch', or 'root' +# 4. 'checksum' : present only for file nodes when checksum_fn is +# not None +# +# Traversal: +# - Any path (directory or file) for which GitIgnore.check() +# returns 'Ignore' is omitted from the tree_dict. +TreeDict = Dict[str, Dict[str, Any]] + +# tree_dict_make / tree_dict_print +# +# Build a dictionary describing a project tree, respecting GitIgnore. +# +# tree_dict_make(, ) -> tree_dict +# +# () -> bignum | None +# +# Keys of tree_dict: +# - Relative paths from ; the root itself is stored under "". +# +# Values are dicts with: +# 1. 'mtime' : last modification time (float seconds) or None +# 2. 'node_type' : 'file', 'directory', 'other', or 'constrained' +# 3. 'dir_info' : 'NA', 'leaf', 'branch', 'root' +# 4. 'checksum' : present only for file nodes when checksum_fn is +# not None +# +# Traversal: +# - Directories whose relative path GitIgnore.check() marks as +# 'Ignore' are included in tree_dict but not traversed further. +def tree_dict_make( + path: str + ,checksum_fn: Callable[[str], int] | None +) -> Dict[str, Dict[str, Any]]: + """ + Build a tree_dict for the subtree rooted at , respecting GitIgnore. + + Semantics (current): + * Any path (directory or file) for which GitIgnore.check() + returns 'Ignore' is completely omitted from the tree_dict. + * The root directory ('') is always included. + * Directory dir_info: + - 'root' for the root + - 'branch' for directories that have child directories + (after GitIgnore filtering) + - 'leaf' for directories with no child directories + * Non-directory dir_info: + - 'NA' + * Symlinks are classified as file/directory/other based on what + they point to, if accessible. + * If any filesystem access needed for classification/mtime raises, + the node is recorded as node_type='constrained', dir_info='NA', + mtime=None, and we do not attempt checksum. + """ + root = os.path.abspath(path) + gi = GitIgnore(root) + + tree_dict: Dict[str, Dict[str, Any]] = {} + + for dirpath, dirnames, filenames in os.walk(root, topdown=True): + rel_dir = os.path.relpath(dirpath, root) + if rel_dir == ".": + rel_dir = "" + + # Skip ignored directories (except the root). + if rel_dir != "" and gi.check(rel_dir) == "Ignore": + dirnames[:] = [] + continue + + # Filter child directories by GitIgnore so dir_info reflects + # only directories we will actually traverse. + kept_dirnames: List[str] = [] + for dn in list(dirnames): + child_rel = dn if rel_dir == "" else os.path.join(rel_dir, dn) + if gi.check(child_rel) == "Ignore": + dirnames.remove(dn) + else: + kept_dirnames.append(dn) + + # Record the directory node itself + dir_abs = dirpath + try: + dir_mtime = os.path.getmtime(dir_abs) + dir_node_type = "directory" + if rel_dir == "": + dir_info = "root" + elif kept_dirnames: + dir_info = "branch" + else: + dir_info = "leaf" + except OSError: + # Could not stat the directory: treat as constrained. + dir_mtime = None + dir_node_type = "constrained" + dir_info = "NA" + + tree_dict[rel_dir] = { + "mtime": dir_mtime + ,"node_type": dir_node_type + ,"dir_info": dir_info + } + + # For non-ignored directories, record files within + for name in filenames: + abs_path = os.path.join(dirpath, name) + if rel_dir == "": + rel_path = name + else: + rel_path = os.path.join(rel_dir, name) + + if gi.check(rel_path) == "Ignore": + continue + + # Wrap classification + mtime in one try/except so any failure + # marks the node as constrained. + try: + if os.path.islink(abs_path): + # Symlink: classify by target if possible + if os.path.isdir(abs_path): + node_type = "directory" + dir_info_f = "branch" + elif os.path.isfile(abs_path): + node_type = "file" + dir_info_f = "NA" + else: + node_type = "other" + dir_info_f = "NA" + mtime = os.path.getmtime(abs_path) + else: + # Normal node + if os.path.isfile(abs_path): + node_type = "file" + dir_info_f = "NA" + elif os.path.isdir(abs_path): + node_type = "directory" + dir_info_f = "branch" + else: + node_type = "other" + dir_info_f = "NA" + mtime = os.path.getmtime(abs_path) + except OSError: + # Anything that blows up during classification/stat becomes + # constrained; we do not attempt checksum for these. + node_type = "constrained" + dir_info_f = "NA" + mtime = None + + info: Dict[str, Any] = { + "mtime": mtime + ,"node_type": node_type + ,"dir_info": dir_info_f + } + + if node_type == "file" and checksum_fn is not None and isinstance(mtime, (int, float)): + info["checksum"] = checksum_fn(abs_path) + + tree_dict[rel_path] = info + + if meta.debug_has("tree_dict_print"): + tree_dict_print(tree_dict) + + return tree_dict + +def tree_dict_print( + tree_dict: Dict[str, Dict[str, Any]] +) -> None: + """ + Pretty-print a tree_dict produced by tree_dict_make() in fixed-width columns: + + [type] [dir] [mtime] [checksum?] [relative path] + + Only the values are printed in each column (no 'field=' prefixes). + mtime is formatted via the Z module for human readability. + """ + entries: List[tuple[str, str, str, str, str]] = [] + has_checksum = False + + for rel_path in sorted(tree_dict.keys()): + info = tree_dict[rel_path] + display_path = rel_path if rel_path != "" else "." + + type_val = str(info.get("node_type", "")) + dir_val = str(info.get("dir_info", "")) + + raw_mtime = info.get("mtime") + if isinstance(raw_mtime, (int, float)): + mtime_val = meta.z_format_mtime(raw_mtime) + else: + mtime_val = str(raw_mtime) + + if "checksum" in info: + checksum_val = str(info["checksum"]) + has_checksum = True + else: + checksum_val = "" + + entries.append(( + type_val + ,dir_val + ,mtime_val + ,checksum_val + ,display_path + )) + + # Compute column widths + type_w = 0 + dir_w = 0 + mtime_w = 0 + checksum_w = 0 + + for type_val, dir_val, mtime_val, checksum_val, _ in entries: + if len(type_val) > type_w: + type_w = len(type_val) + if len(dir_val) > dir_w: + dir_w = len(dir_val) + if len(mtime_val) > mtime_w: + mtime_w = len(mtime_val) + if has_checksum and len(checksum_val) > checksum_w: + checksum_w = len(checksum_val) + + print("Tree dictionary contents:") + for type_val, dir_val, mtime_val, checksum_val, display_path in entries: + line = " " + line += type_val.ljust(type_w) + line += " " + line += dir_val.ljust(dir_w) + line += " " + line += mtime_val.ljust(mtime_w) + + if has_checksum: + line += " " + line += checksum_val.ljust(checksum_w) + + line += " " + line += display_path + + print(line) + + +def tree_dict_A_minus_B( + A: Dict[str, Dict[str, Any]] + ,B: Dict[str, Dict[str, Any]] +) -> Dict[str, Dict[str, Any]]: + """ + Compute the set difference of two tree_dicts at the key level: + + Result = A \\ B + + That is, return a new tree_dict containing only those entries whose + keys are present in A but NOT present in B. + """ + result: Dict[str, Dict[str, Any]] = {} + + B_keys = set(B.keys()) + + for key, info in A.items(): + if key not in B_keys: + result[key] = info + + if meta.debug_has("tree_dict_A_minus_B"): + tree_dict_print(result) + + return result + + +def tree_dict_in_between_and_below( + A: Dict[str, Dict[str, Any]] + ,B: Dict[str, Dict[str, Any]] +) -> tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]: + """ + Partition nodes of B into two topology-based sets relative to A: + + 1. in_between: + Nodes in B that lie under at least one directory node in A, + but do NOT lie under any leaf directory of A. + + 2. below: + Nodes in B that lie under at least one leaf directory of A. + + Definitions (relative to A's directory topology): + + - A directory node in A is any key whose info['node_type'] == 'directory'. + + - A leaf directory in A is a directory that has no *other* directory + in A as a proper descendant. The project root ('') is therefore + never a leaf (it always has descendant directories if the tree is + non-trivial). + + - “Lies under”: + * For a path p in B, we look at the chain of directory ancestors + (including the root "") and, if p itself is a directory, p + itself. Any of those that appear as directory keys in A are + considered directory ancestors in A. + + * If any of those ancestors is a leaf in A, p goes to 'below'. + Otherwise, if there is at least one directory ancestor in A, + p goes to 'in_between'. + + - Nodes in B that do not lie under any directory in A are ignored. + + Returns: + (in_between_dict, below_dict), both keyed like B and containing + copies of the info dicts from B. + """ + # 1. Collect all directory keys from A + A_dir_keys: Set[str] = set( + key for key, info in A.items() + if info.get("node_type") == "directory" + ) + + # 2. Compute leaf directories in A + leaf_dirs: Set[str] = set() + + for d in A_dir_keys: + if d == "": + continue + + has_child_dir = False + prefix = d + os.sep + + for other in A_dir_keys: + if other == d: + continue + if other.startswith(prefix): + has_child_dir = True + break + + if not has_child_dir: + leaf_dirs.add(d) + + in_between: Dict[str, Dict[str, Any]] = {} + below: Dict[str, Dict[str, Any]] = {} + + for key, info in B.items(): + # Skip B's root + if key in ("", "."): + continue + + parts = key.split(os.sep) + + # Build directory ancestor chain + node_is_dir = (info.get("node_type") == "directory") + + ancestors: List[str] = [""] + prefix = None + + if node_is_dir: + upto = parts + else: + upto = parts[:-1] + + for part in upto: + if prefix is None: + prefix = part + else: + prefix = os.path.join(prefix, part) + ancestors.append(prefix) + + # Filter ancestors to those that exist as directories in A + ancestors_in_A = [d for d in ancestors if d in A_dir_keys] + + if not ancestors_in_A: + # This B node is not under any directory from A; ignore it. + continue + + # Any leaf ancestor in A? + has_leaf_ancestor = any(d in leaf_dirs for d in ancestors_in_A) + + if has_leaf_ancestor: + below[key] = info + else: + in_between[key] = info + + if meta.debug_has("tree_dict_in_between_and_below"): + merged: Dict[str, Dict[str, Any]] = {} + merged.update(in_between) + merged.update(below) + tree_dict_print(merged) + + return in_between, below + + +def tree_dict_newer( + A: Dict[str, Dict[str, Any]] + ,B: Dict[str, Dict[str, Any]] +) -> Dict[str, Dict[str, Any]]: + """ + Return a dictionary of nodes from B that are newer than their + corresponding nodes in A. + + For each key k: + + - If k exists in both A and B, and + - B[k]['mtime'] > A[k]['mtime'], + + then k is included in the result with value B[k]. + + Keys that are only in B (not in A) are ignored here. + """ + result: Dict[str, Dict[str, Any]] = {} + + for key, info_B in B.items(): + info_A = A.get(key) + if info_A is None: + continue + + mtime_A = info_A.get("mtime") + mtime_B = info_B.get("mtime") + + if mtime_A is None or mtime_B is None: + continue + + if mtime_B > mtime_A: + result[key] = info_B + + if meta.debug_has("tree_dict_newer"): + tree_dict_print(result) + + return result + + +def tree_dict_older( + A: Dict[str, Dict[str, Dict[str, Any]]] + ,B: Dict[str, Dict[str, Dict[str, Any]]] +) -> Dict[str, Dict[str, Any]]: + """ + Return a dictionary of nodes from B that are older than their + corresponding nodes in A. + + For each key k: + + - If k exists in both A and B, and + - B[k]['mtime'] < A[k]['mtime'], + + then k is included in the result with value B[k]. + + Keys that are only in B (not in A) are ignored here. + """ + result: Dict[str, Dict[str, Any]] = {} + + for key, info_B in B.items(): + info_A = A.get(key) + if info_A is None: + continue + + mtime_A = info_A.get("mtime") + mtime_B = info_B.get("mtime") + + if mtime_A is None or mtime_B is None: + continue + + if mtime_B < mtime_A: + result[key] = info_B + + if meta.debug_has("tree_dict_older"): + tree_dict_print(result) + + return result + + +def in_between_newer( + A: TreeDict + ,B: TreeDict +) -> TreeDict: + """ + in_between_newer(A, B) -> TreeDict + + Return the subset of B's nodes that: + + 1. Are in the 'in_between' region with respect to A's topology: + - under some directory that exists in A + - NOT under any leaf directory in A + (as defined by tree_dict_in_between_and_below), and + + 2. For file nodes: + - are "newer" than A at the same path, or + - are absent from A. + + More precisely: + - If A has no entry for that path -> include. + - If A has a non-file and B has a file -> include. + - If both are files and B.mtime > A.mtime -> include. + + 3. For constrained nodes: + - are always included, so that higher-level commands (e.g. + 'import') can surface them as "not handled automatically". + + Notes: + - Only file nodes participate in mtime comparisons. + - Nodes with node_type == 'constrained' are passed through without + mtime checks, so that callers can report them separately. + """ + in_between, _below = tree_dict_in_between_and_below(A, B) + + result: TreeDict = {} + + for path, b_info in in_between.items(): + b_type = b_info.get("node_type") + + # Constrained nodes: always surface so the caller can list them + # under "not handled automatically". + if b_type == "constrained": + result[path] = b_info + continue + + # We only do "newer" semantics for regular files. + if b_type != "file": + continue + + b_mtime = b_info.get("mtime") + a_info = A.get(path) + + # Case 1: path not in A at all -> include (new file in in-between) + if a_info is None: + result[path] = b_info + continue + + a_type = a_info.get("node_type") + + # Case 2: A has non-file, B has file -> include + if a_type != "file": + result[path] = b_info + continue + + # Case 3: both are files; compare mtime + a_mtime = a_info.get("mtime") + if ( + isinstance(a_mtime, (int, float)) + and isinstance(b_mtime, (int, float)) + and b_mtime > a_mtime + ): + result[path] = b_info + + if meta.debug_has("in_between_newer"): + tree_dict_print(result) + + return result diff --git a/tool/skeleton_compare_source/temp.txt b/tool/skeleton_compare_source/temp.txt new file mode 100644 index 0000000..fb4aebf --- /dev/null +++ b/tool/skeleton_compare_source/temp.txt @@ -0,0 +1,36 @@ +Perhaps we are now ready to summit. + +We will add the module 'command.py' + +functions: help, + +The top level function, `dispatch`, will take the list. It will then dispatch a distinct function for each command. + +Each command specific function will use the functions we have written to accomplish the work of the command. See notes below. + +``` +Usage: + check * [] + +Where: + :: | | + + :: version | help | usage + :: environment + :: structure | import | export | suspicious | addendum | all +``` +The meaning of these commands: + +structure: differences in directory structure, directories in A - B + +import: a list of shell copy commands that would copy newer nodes in B into A, or nodes that are not in B into A. Editing this list, then running the shell commands will be helpful for updating the Harmony skeleton (A). + +export: a list of shell copy commands that would copy newer nodes in A into B, or nodes in A that are not in B. Comes from the older list. Editing this list, then running the shell commands will be helpful for keeping the skeleton in B up to date. + +suspicious: basically the InBetween list. Why did the user put things into areas that are part of the skeleton instead of under leaf directories? tools, source code, tests, etc. all have leaf directories for adding things to. What are these files? Perhaps proposed additions to the skeleton? Perhaps bad usage of the skeleton (in general use scenarios this is more likely). + +addendum: this is the work that has been added to the project in proper places. It is interesting to see the work that has been done on the project. + +all: runs structure, import, export, suspicious, and addendum + +it seems I left an analysis out, but it doesn't come to mind at the moment. Can you think of it?