a usable version of the skeleton check tool, though a few more tweaks to go
authorThomas Walker Lynch <eknp9n@reasoningtechnology.com>
Wed, 19 Nov 2025 15:25:35 +0000 (15:25 +0000)
committerThomas Walker Lynch <eknp9n@reasoningtechnology.com>
Wed, 19 Nov 2025 15:25:35 +0000 (15:25 +0000)
tool/skeleton/skeleton.py

index 549de93..b8ffa70 100644 (file)
@@ -38,12 +38,52 @@ TreeDict = Dict[str, Dict[str, Any]]
 #   Traversal:
 #     - Any path (directory or file) for which GitIgnore.check(<rel_path>)
 #       returns 'Ignore' is omitted from the tree_dict.
+TreeDict = Dict[str, Dict[str, Any]]
+
+# tree_dict_make / tree_dict_print
+#
+# Build a dictionary describing a project tree, respecting GitIgnore.
+#
+# tree_dict_make(<path>, <checksum_fn>) -> tree_dict
+#
+#   <checksum_fn>(<abs_path>) -> bignum | None
+#
+#   Keys of tree_dict:
+#     - Relative paths from <path>; the root itself is stored under "".
+#
+#   Values are dicts with:
+#     1. 'mtime'     : last modification time (float seconds) or None
+#     2. 'node_type' : 'file', 'directory', 'other', or 'constrained'
+#     3. 'dir_info'  : 'NA', 'leaf', 'branch', 'root'
+#     4. 'checksum'  : present only for file nodes when checksum_fn is
+#                      not None
+#
+#   Traversal:
+#     - Directories whose relative path GitIgnore.check() marks as
+#       'Ignore' are included in tree_dict but not traversed further.
 def tree_dict_make(
   path: str
   ,checksum_fn: Callable[[str], int] | None
 ) -> Dict[str, Dict[str, Any]]:
   """
   Build a tree_dict for the subtree rooted at <path>, respecting GitIgnore.
+
+  Semantics (current):
+    * Any path (directory or file) for which GitIgnore.check(<rel_path>)
+      returns 'Ignore' is completely omitted from the tree_dict.
+    * The root directory ('') is always included.
+    * Directory dir_info:
+        - 'root'   for the root
+        - 'branch' for directories that have child directories
+                    (after GitIgnore filtering)
+        - 'leaf'   for directories with no child directories
+    * Non-directory dir_info:
+        - 'NA'
+    * Symlinks are classified as file/directory/other based on what
+      they point to, if accessible.
+    * If any filesystem access needed for classification/mtime raises,
+      the node is recorded as node_type='constrained', dir_info='NA',
+      mtime=None, and we do not attempt checksum.
   """
   root = os.path.abspath(path)
   gi = GitIgnore(root)
@@ -55,7 +95,7 @@ def tree_dict_make(
     if rel_dir == ".":
       rel_dir = ""
 
-    # Skip ignored directories (except the root)
+    # Skip ignored directories (except the root).
     if rel_dir != "" and gi.check(rel_dir) == "Ignore":
       dirnames[:] = []
       continue
@@ -72,15 +112,20 @@ def tree_dict_make(
 
     # Record the directory node itself
     dir_abs = dirpath
-    dir_mtime = os.path.getmtime(dir_abs)
-    dir_node_type = "directory"
-
-    if rel_dir == "":
-      dir_info = "root"
-    elif kept_dirnames:
-      dir_info = "branch"
-    else:
-      dir_info = "leaf"
+    try:
+      dir_mtime = os.path.getmtime(dir_abs)
+      dir_node_type = "directory"
+      if rel_dir == "":
+        dir_info = "root"
+      elif kept_dirnames:
+        dir_info = "branch"
+      else:
+        dir_info = "leaf"
+    except OSError:
+      # Could not stat the directory: treat as constrained.
+      dir_mtime = None
+      dir_node_type = "constrained"
+      dir_info = "NA"
 
     tree_dict[rel_dir] = {
       "mtime": dir_mtime
@@ -99,23 +144,39 @@ def tree_dict_make(
       if gi.check(rel_path) == "Ignore":
         continue
 
-      if os.path.isfile(abs_path):
-        node_type = "file"
-      elif os.path.isdir(abs_path):
-        node_type = "directory"
-      else:
-        node_type = "other"
-
-      mtime = os.path.getmtime(abs_path)
-
-      if node_type == "directory":
-        # Defensive; os.walk normally handles directories separately.
-        if rel_path == "":
-          dir_info_f = "root"
+      # Wrap classification + mtime in one try/except so any failure
+      # marks the node as constrained.
+      try:
+        if os.path.islink(abs_path):
+          # Symlink: classify by target if possible
+          if os.path.isdir(abs_path):
+            node_type = "directory"
+            dir_info_f = "branch"
+          elif os.path.isfile(abs_path):
+            node_type = "file"
+            dir_info_f = "NA"
+          else:
+            node_type = "other"
+            dir_info_f = "NA"
+          mtime = os.path.getmtime(abs_path)
         else:
-          dir_info_f = "branch"
-      else:
+          # Normal node
+          if os.path.isfile(abs_path):
+            node_type = "file"
+            dir_info_f = "NA"
+          elif os.path.isdir(abs_path):
+            node_type = "directory"
+            dir_info_f = "branch"
+          else:
+            node_type = "other"
+            dir_info_f = "NA"
+          mtime = os.path.getmtime(abs_path)
+      except OSError:
+        # Anything that blows up during classification/stat becomes
+        # constrained; we do not attempt checksum for these.
+        node_type = "constrained"
         dir_info_f = "NA"
+        mtime = None
 
       info: Dict[str, Any] = {
         "mtime": mtime
@@ -123,7 +184,7 @@ def tree_dict_make(
         ,"dir_info": dir_info_f
       }
 
-      if node_type == "file" and checksum_fn is not None:
+      if node_type == "file" and checksum_fn is not None and isinstance(mtime, (int, float)):
         info["checksum"] = checksum_fn(abs_path)
 
       tree_dict[rel_path] = info
@@ -133,7 +194,6 @@ def tree_dict_make(
 
   return tree_dict
 
-
 def tree_dict_print(
   tree_dict: Dict[str, Dict[str, Any]]
 ) -> None: