# Traversal:
# - Any path (directory or file) for which GitIgnore.check(<rel_path>)
# returns 'Ignore' is omitted from the tree_dict.
+TreeDict = Dict[str, Dict[str, Any]]
+
+# tree_dict_make / tree_dict_print
+#
+# Build a dictionary describing a project tree, respecting GitIgnore.
+#
+# tree_dict_make(<path>, <checksum_fn>) -> tree_dict
+#
+# <checksum_fn>(<abs_path>) -> bignum | None
+#
+# Keys of tree_dict:
+# - Relative paths from <path>; the root itself is stored under "".
+#
+# Values are dicts with:
+# 1. 'mtime' : last modification time (float seconds) or None
+# 2. 'node_type' : 'file', 'directory', 'other', or 'constrained'
+# 3. 'dir_info' : 'NA', 'leaf', 'branch', 'root'
+# 4. 'checksum' : present only for file nodes when checksum_fn is
+# not None
+#
+# Traversal:
+# - Directories whose relative path GitIgnore.check() marks as
+# 'Ignore' are included in tree_dict but not traversed further.
def tree_dict_make(
path: str
,checksum_fn: Callable[[str], int] | None
) -> Dict[str, Dict[str, Any]]:
"""
Build a tree_dict for the subtree rooted at <path>, respecting GitIgnore.
+
+ Semantics (current):
+ * Any path (directory or file) for which GitIgnore.check(<rel_path>)
+ returns 'Ignore' is completely omitted from the tree_dict.
+ * The root directory ('') is always included.
+ * Directory dir_info:
+ - 'root' for the root
+ - 'branch' for directories that have child directories
+ (after GitIgnore filtering)
+ - 'leaf' for directories with no child directories
+ * Non-directory dir_info:
+ - 'NA'
+ * Symlinks are classified as file/directory/other based on what
+ they point to, if accessible.
+ * If any filesystem access needed for classification/mtime raises,
+ the node is recorded as node_type='constrained', dir_info='NA',
+ mtime=None, and we do not attempt checksum.
"""
root = os.path.abspath(path)
gi = GitIgnore(root)
if rel_dir == ".":
rel_dir = ""
- # Skip ignored directories (except the root)
+ # Skip ignored directories (except the root).
if rel_dir != "" and gi.check(rel_dir) == "Ignore":
dirnames[:] = []
continue
# Record the directory node itself
dir_abs = dirpath
- dir_mtime = os.path.getmtime(dir_abs)
- dir_node_type = "directory"
-
- if rel_dir == "":
- dir_info = "root"
- elif kept_dirnames:
- dir_info = "branch"
- else:
- dir_info = "leaf"
+ try:
+ dir_mtime = os.path.getmtime(dir_abs)
+ dir_node_type = "directory"
+ if rel_dir == "":
+ dir_info = "root"
+ elif kept_dirnames:
+ dir_info = "branch"
+ else:
+ dir_info = "leaf"
+ except OSError:
+ # Could not stat the directory: treat as constrained.
+ dir_mtime = None
+ dir_node_type = "constrained"
+ dir_info = "NA"
tree_dict[rel_dir] = {
"mtime": dir_mtime
if gi.check(rel_path) == "Ignore":
continue
- if os.path.isfile(abs_path):
- node_type = "file"
- elif os.path.isdir(abs_path):
- node_type = "directory"
- else:
- node_type = "other"
-
- mtime = os.path.getmtime(abs_path)
-
- if node_type == "directory":
- # Defensive; os.walk normally handles directories separately.
- if rel_path == "":
- dir_info_f = "root"
+ # Wrap classification + mtime in one try/except so any failure
+ # marks the node as constrained.
+ try:
+ if os.path.islink(abs_path):
+ # Symlink: classify by target if possible
+ if os.path.isdir(abs_path):
+ node_type = "directory"
+ dir_info_f = "branch"
+ elif os.path.isfile(abs_path):
+ node_type = "file"
+ dir_info_f = "NA"
+ else:
+ node_type = "other"
+ dir_info_f = "NA"
+ mtime = os.path.getmtime(abs_path)
else:
- dir_info_f = "branch"
- else:
+ # Normal node
+ if os.path.isfile(abs_path):
+ node_type = "file"
+ dir_info_f = "NA"
+ elif os.path.isdir(abs_path):
+ node_type = "directory"
+ dir_info_f = "branch"
+ else:
+ node_type = "other"
+ dir_info_f = "NA"
+ mtime = os.path.getmtime(abs_path)
+ except OSError:
+ # Anything that blows up during classification/stat becomes
+ # constrained; we do not attempt checksum for these.
+ node_type = "constrained"
dir_info_f = "NA"
+ mtime = None
info: Dict[str, Any] = {
"mtime": mtime
,"dir_info": dir_info_f
}
- if node_type == "file" and checksum_fn is not None:
+ if node_type == "file" and checksum_fn is not None and isinstance(mtime, (int, float)):
info["checksum"] = checksum_fn(abs_path)
tree_dict[rel_path] = info
return tree_dict
-
def tree_dict_print(
tree_dict: Dict[str, Dict[str, Any]]
) -> None: