1
0
mirror of https://github.com/tw93/Mole.git synced 2026-03-22 19:05:07 +00:00

feat(clean): add storage clues after large files

This commit is contained in:
tw93
2026-02-23 11:34:04 +08:00
parent 1169cbf198
commit 3112673ed3
4 changed files with 448 additions and 6 deletions

View File

@@ -121,6 +121,13 @@ SECTION_ACTIVITY=0
files_cleaned=0
total_size_cleaned=0
whitelist_skipped_count=0
PROJECT_ARTIFACT_HINT_DETECTED=false
PROJECT_ARTIFACT_HINT_COUNT=0
PROJECT_ARTIFACT_HINT_TRUNCATED=false
PROJECT_ARTIFACT_HINT_EXAMPLES=()
PROJECT_ARTIFACT_HINT_ESTIMATED_KB=0
PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES=0
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=false
# shellcheck disable=SC2329
note_activity() {
@@ -294,6 +301,427 @@ classify_cleanup_risk() {
echo "MEDIUM|User data files"
}
# Quick reminder probe for project build artifacts handled by `mo purge`.
# Designed to be very fast: shallow directory checks only, no du/find scans.
# shellcheck disable=SC2329
load_quick_purge_hint_paths() {
local config_file="$HOME/.config/mole/purge_paths"
local -a paths=()
if [[ -f "$config_file" ]]; then
while IFS= read -r line; do
line="${line#"${line%%[![:space:]]*}"}"
line="${line%"${line##*[![:space:]]}"}"
[[ -z "$line" || "$line" =~ ^# ]] && continue
[[ "$line" == ~* ]] && line="${line/#~/$HOME}"
paths+=("$line")
done < "$config_file"
fi
if [[ ${#paths[@]} -eq 0 ]]; then
paths=(
"$HOME/www"
"$HOME/dev"
"$HOME/Projects"
"$HOME/GitHub"
"$HOME/Code"
"$HOME/Workspace"
"$HOME/Repos"
"$HOME/Development"
)
fi
if [[ ${#paths[@]} -gt 0 ]]; then
printf '%s\n' "${paths[@]}"
fi
}
# shellcheck disable=SC2329
record_project_artifact_hint() {
local path="$1"
((PROJECT_ARTIFACT_HINT_COUNT++))
if [[ ${#PROJECT_ARTIFACT_HINT_EXAMPLES[@]} -lt 2 ]]; then
PROJECT_ARTIFACT_HINT_EXAMPLES+=("${path/#$HOME/~}")
fi
local sample_max=3
if [[ $PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES -ge $sample_max ]]; then
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=true
return 0
fi
local timeout_seconds="0.8"
local du_tmp
du_tmp=$(mktemp)
local du_status=0
if run_with_timeout "$timeout_seconds" du -skP "$path" > "$du_tmp" 2> /dev/null; then
du_status=0
else
du_status=$?
fi
if [[ $du_status -eq 0 ]]; then
local size_kb
size_kb=$(awk 'NR==1 {print $1; exit}' "$du_tmp")
if [[ "$size_kb" =~ ^[0-9]+$ ]]; then
((PROJECT_ARTIFACT_HINT_ESTIMATED_KB += size_kb))
((PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES++))
else
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=true
fi
else
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=true
fi
rm -f "$du_tmp"
}
# shellcheck disable=SC2329
is_quick_purge_project_root() {
local dir="$1"
local indicator
# Mirror purge single-project detection so configured project roots like
# ~/www/Pake can still surface direct-child artifacts such as target/.
local -a indicators=(
"lerna.json"
"pnpm-workspace.yaml"
"nx.json"
"rush.json"
"package.json"
"Cargo.toml"
"go.mod"
"pyproject.toml"
"requirements.txt"
"pom.xml"
"build.gradle"
"Gemfile"
"composer.json"
"pubspec.yaml"
"Makefile"
"build.zig"
"build.zig.zon"
".git"
)
for indicator in "${indicators[@]}"; do
if [[ -e "$dir/$indicator" ]]; then
return 0
fi
done
return 1
}
# shellcheck disable=SC2329
probe_project_artifact_hints() {
PROJECT_ARTIFACT_HINT_DETECTED=false
PROJECT_ARTIFACT_HINT_COUNT=0
PROJECT_ARTIFACT_HINT_TRUNCATED=false
PROJECT_ARTIFACT_HINT_EXAMPLES=()
PROJECT_ARTIFACT_HINT_ESTIMATED_KB=0
PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES=0
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=false
local max_projects=200
local max_projects_per_root=0
local max_nested_per_project=120
local max_matches=12
# Fast hint list tracks most purge targets, but excludes high-noise names
# like `bin` and `vendor` that need deeper context checks in purge logic.
local -a target_names=(
"node_modules"
"target"
"build"
"dist"
"venv"
".venv"
".pytest_cache"
".mypy_cache"
".tox"
".nox"
".ruff_cache"
".gradle"
"__pycache__"
".next"
".nuxt"
".output"
"obj"
".turbo"
".parcel-cache"
".dart_tool"
".zig-cache"
"zig-out"
".angular"
".svelte-kit"
".astro"
"coverage"
"DerivedData"
"Pods"
".cxx"
".expo"
)
local -a scan_roots=()
while IFS= read -r path; do
[[ -n "$path" ]] && scan_roots+=("$path")
done < <(load_quick_purge_hint_paths)
[[ ${#scan_roots[@]} -eq 0 ]] && return 0
# Fairness: avoid one very large root exhausting the entire scan budget.
if [[ $max_projects_per_root -le 0 ]]; then
max_projects_per_root=$(((max_projects + ${#scan_roots[@]} - 1) / ${#scan_roots[@]}))
[[ $max_projects_per_root -lt 25 ]] && max_projects_per_root=25
fi
[[ $max_projects_per_root -gt $max_projects ]] && max_projects_per_root=$max_projects
local nullglob_was_set=0
if shopt -q nullglob; then
nullglob_was_set=1
fi
shopt -s nullglob
local scanned_projects=0
local stop_scan=false
local root project_dir nested_dir target_name candidate
for root in "${scan_roots[@]}"; do
[[ -d "$root" ]] || continue
local root_projects_scanned=0
if is_quick_purge_project_root "$root"; then
((scanned_projects++))
((root_projects_scanned++))
if [[ $scanned_projects -gt $max_projects ]]; then
PROJECT_ARTIFACT_HINT_TRUNCATED=true
stop_scan=true
break
fi
for target_name in "${target_names[@]}"; do
candidate="$root/$target_name"
if [[ -d "$candidate" ]]; then
record_project_artifact_hint "$candidate"
fi
done
fi
[[ "$stop_scan" == "true" ]] && break
if [[ $root_projects_scanned -ge $max_projects_per_root ]]; then
PROJECT_ARTIFACT_HINT_TRUNCATED=true
continue
fi
for project_dir in "$root"/*/; do
[[ -d "$project_dir" ]] || continue
project_dir="${project_dir%/}"
local project_name
project_name=$(basename "$project_dir")
[[ "$project_name" == .* ]] && continue
if [[ $root_projects_scanned -ge $max_projects_per_root ]]; then
PROJECT_ARTIFACT_HINT_TRUNCATED=true
break
fi
((scanned_projects++))
((root_projects_scanned++))
if [[ $scanned_projects -gt $max_projects ]]; then
PROJECT_ARTIFACT_HINT_TRUNCATED=true
stop_scan=true
break
fi
for target_name in "${target_names[@]}"; do
candidate="$project_dir/$target_name"
if [[ -d "$candidate" ]]; then
record_project_artifact_hint "$candidate"
fi
done
[[ "$stop_scan" == "true" ]] && break
local nested_count=0
for nested_dir in "$project_dir"/*/; do
[[ -d "$nested_dir" ]] || continue
nested_dir="${nested_dir%/}"
local nested_name
nested_name=$(basename "$nested_dir")
[[ "$nested_name" == .* ]] && continue
case "$nested_name" in
node_modules | target | build | dist | DerivedData | Pods)
continue
;;
esac
((nested_count++))
if [[ $nested_count -gt $max_nested_per_project ]]; then
break
fi
for target_name in "${target_names[@]}"; do
candidate="$nested_dir/$target_name"
if [[ -d "$candidate" ]]; then
record_project_artifact_hint "$candidate"
fi
done
[[ "$stop_scan" == "true" ]] && break
done
[[ "$stop_scan" == "true" ]] && break
done
[[ "$stop_scan" == "true" ]] && break
done
if [[ $nullglob_was_set -eq 0 ]]; then
shopt -u nullglob
fi
if [[ $PROJECT_ARTIFACT_HINT_COUNT -gt 0 ]]; then
PROJECT_ARTIFACT_HINT_DETECTED=true
fi
# Preserve a compact display hint if candidate count is large, but do not
# stop scanning early solely because we exceeded this threshold.
if [[ $PROJECT_ARTIFACT_HINT_COUNT -gt $max_matches ]]; then
PROJECT_ARTIFACT_HINT_TRUNCATED=true
fi
return 0
}
# shellcheck disable=SC2329
show_system_data_hint_notice() {
local min_gb=2
local timeout_seconds="0.8"
local max_hits=3
local threshold_kb=$((min_gb * 1024 * 1024))
local -a clue_labels=()
local -a clue_sizes=()
local -a clue_paths=()
local -a labels=(
"Xcode DerivedData"
"Xcode Archives"
"iPhone backups"
"Simulator data"
"Docker Desktop data"
"Mail data"
)
local -a paths=(
"$HOME/Library/Developer/Xcode/DerivedData"
"$HOME/Library/Developer/Xcode/Archives"
"$HOME/Library/Application Support/MobileSync/Backup"
"$HOME/Library/Developer/CoreSimulator/Devices"
"$HOME/Library/Containers/com.docker.docker/Data"
"$HOME/Library/Mail"
)
local i
for i in "${!paths[@]}"; do
local path="${paths[$i]}"
[[ -d "$path" ]] || continue
local du_tmp
du_tmp=$(mktemp)
local du_status=0
if run_with_timeout "$timeout_seconds" du -skP "$path" > "$du_tmp" 2> /dev/null; then
du_status=0
else
du_status=$?
fi
if [[ $du_status -eq 0 ]]; then
local size_kb
size_kb=$(awk 'NR==1 {print $1; exit}' "$du_tmp")
if [[ "$size_kb" =~ ^[0-9]+$ ]] && [[ "$size_kb" -ge "$threshold_kb" ]]; then
clue_labels+=("${labels[$i]}")
clue_sizes+=("$size_kb")
clue_paths+=("${path/#$HOME/~}")
if [[ ${#clue_labels[@]} -ge $max_hits ]]; then
rm -f "$du_tmp"
break
fi
fi
fi
rm -f "$du_tmp"
done
if [[ ${#clue_labels[@]} -eq 0 ]]; then
note_activity
echo -e " ${GREEN}${ICON_SUCCESS}${NC} No common System Data clues detected"
return 0
fi
note_activity
for i in "${!clue_labels[@]}"; do
local human_size
human_size=$(bytes_to_human "$((clue_sizes[$i] * 1024))")
echo -e " ${GREEN}${ICON_LIST}${NC} ${clue_labels[$i]}: ${human_size}"
echo -e " ${GRAY}${ICON_SUBLIST}${NC} Path: ${GRAY}${clue_paths[$i]}${NC}"
done
echo -e " ${GRAY}${ICON_REVIEW}${NC} Review: mo analyze, Device backups, docker system df"
}
# shellcheck disable=SC2329
show_project_artifact_hint_notice() {
probe_project_artifact_hints
if [[ "$PROJECT_ARTIFACT_HINT_DETECTED" != "true" ]]; then
return 0
fi
note_activity
local hint_count_label="$PROJECT_ARTIFACT_HINT_COUNT"
[[ "$PROJECT_ARTIFACT_HINT_TRUNCATED" == "true" ]] && hint_count_label="${hint_count_label}+"
local example_text=""
if [[ ${#PROJECT_ARTIFACT_HINT_EXAMPLES[@]} -gt 0 ]]; then
example_text="${PROJECT_ARTIFACT_HINT_EXAMPLES[0]}"
if [[ ${#PROJECT_ARTIFACT_HINT_EXAMPLES[@]} -gt 1 ]]; then
example_text+=", ${PROJECT_ARTIFACT_HINT_EXAMPLES[1]}"
fi
fi
if [[ $PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES -gt 0 ]]; then
local estimate_human
estimate_human=$(bytes_to_human "$((PROJECT_ARTIFACT_HINT_ESTIMATED_KB * 1024))")
local estimate_is_partial="$PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL"
if [[ "$PROJECT_ARTIFACT_HINT_TRUNCATED" == "true" ]] || [[ $PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES -lt $PROJECT_ARTIFACT_HINT_COUNT ]]; then
estimate_is_partial=true
fi
if [[ "$estimate_is_partial" == "true" ]]; then
echo -e " ${GREEN}${ICON_LIST}${NC} ${GREEN}${hint_count_label}${NC} candidates, at least ${estimate_human} sampled from ${PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES} items"
else
echo -e " ${GREEN}${ICON_LIST}${NC} ${GREEN}${hint_count_label}${NC} candidates, sampled ${estimate_human}"
fi
else
echo -e " ${GREEN}${ICON_LIST}${NC} ${GREEN}${hint_count_label}${NC} candidates"
fi
if [[ -n "$example_text" ]]; then
echo -e " ${GRAY}${ICON_SUBLIST}${NC} Examples: ${GRAY}${example_text}${NC}"
fi
echo -e " ${GRAY}${ICON_REVIEW}${NC} Review: mo purge"
}
# shellcheck disable=SC2329
safe_clean() {
if [[ $# -eq 0 ]]; then
@@ -867,7 +1295,7 @@ perform_cleanup() {
if [[ "$DRY_RUN" == "true" ]]; then
for pattern in "${WHITELIST_PATTERNS[@]}"; do
[[ "$pattern" == "$FINDER_METADATA_SENTINEL" ]] && continue
echo -e " ${GRAY}$pattern${NC}"
echo -e " ${GRAY}${ICON_SUBLIST}${NC} ${GRAY}${pattern}${NC}"
done
fi
fi
@@ -879,7 +1307,7 @@ perform_cleanup() {
fda_status=$?
if [[ $fda_status -eq 1 ]]; then
echo ""
echo -e "${GRAY}${ICON_WARNING}${NC} ${GRAY}Tip: Grant Full Disk Access to your terminal in System Settings for best results${NC}"
echo -e "${GRAY}${ICON_REVIEW}${NC} ${GRAY}Grant Full Disk Access to your terminal in System Settings for best results${NC}"
fi
fi
@@ -976,6 +1404,16 @@ perform_cleanup() {
check_large_file_candidates
end_section
# ===== 15. System Data clues =====
start_section "System Data clues"
show_system_data_hint_notice
end_section
# ===== 16. Project artifacts =====
start_section "Project artifacts"
show_project_artifact_hint_notice
end_section
# ===== Final summary =====
echo ""

View File

@@ -410,7 +410,8 @@ clean_local_snapshots() {
local snapshot_count
snapshot_count=$(echo "$snapshot_list" | { grep -Eo 'com\.apple\.TimeMachine\.[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{6}' || true; } | wc -l | awk '{print $1}')
if [[ "$snapshot_count" =~ ^[0-9]+$ && "$snapshot_count" -gt 0 ]]; then
echo -e " ${YELLOW}${ICON_WARNING}${NC} Time Machine local snapshots: ${GREEN}${snapshot_count}${NC}${GRAY}, Review: tmutil listlocalsnapshots /${NC}"
echo -e " ${YELLOW}${ICON_WARNING}${NC} Time Machine local snapshots: ${GREEN}${snapshot_count}${NC}"
echo -e " ${GRAY}${ICON_REVIEW}${NC} ${GRAY}Review: tmutil listlocalsnapshots /${NC}"
note_activity
fi
}

View File

@@ -887,7 +887,8 @@ check_large_file_candidates() {
if [[ -n "$snapshot_list" ]]; then
snapshot_count=$(echo "$snapshot_list" | { grep -Eo 'com\.apple\.TimeMachine\.[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{6}' || true; } | wc -l | awk '{print $1}')
if [[ "$snapshot_count" =~ ^[0-9]+$ && "$snapshot_count" -gt 0 ]]; then
echo -e " ${YELLOW}${ICON_WARNING}${NC} Time Machine local snapshots: ${GREEN}${snapshot_count}${NC}${GRAY}, Review: tmutil listlocalsnapshots /${NC}"
echo -e " ${YELLOW}${ICON_WARNING}${NC} Time Machine local snapshots: ${GREEN}${snapshot_count}${NC}"
echo -e " ${GRAY}${ICON_REVIEW}${NC} ${GRAY}Review: tmutil listlocalsnapshots /${NC}"
found_any=true
fi
fi
@@ -900,14 +901,14 @@ check_large_file_candidates() {
echo -e " ${YELLOW}${ICON_WARNING}${NC} Docker storage:"
while IFS=$'\t' read -r dtype dsize dreclaim; do
[[ -z "$dtype" ]] && continue
echo -e " ${GRAY} $dtype: $dsize, Reclaimable: $dreclaim${NC}"
echo -e " ${GRAY}${ICON_LIST} $dtype: $dsize, Reclaimable: $dreclaim${NC}"
done <<< "$docker_output"
found_any=true
else
docker_output=$(run_with_timeout 3 docker system df 2> /dev/null || true)
if [[ -n "$docker_output" ]]; then
echo -e " ${YELLOW}${ICON_WARNING}${NC} Docker storage:"
echo -e " ${GRAY}Run: docker system df${NC}"
echo -e " ${GRAY}${ICON_REVIEW}${NC} ${GRAY}Run: docker system df${NC}"
found_any=true
fi
fi

View File

@@ -35,8 +35,10 @@ readonly ICON_WARNING="◎"
readonly ICON_EMPTY="○"
readonly ICON_SOLID="●"
readonly ICON_LIST="•"
readonly ICON_SUBLIST="↳"
readonly ICON_ARROW="➤"
readonly ICON_DRY_RUN="→"
readonly ICON_REVIEW="☞"
readonly ICON_NAV_UP="↑"
readonly ICON_NAV_DOWN="↓"