mirror of
https://github.com/tw93/Mole.git
synced 2026-03-22 17:55:08 +00:00
feat(clean): add storage clues after large files
This commit is contained in:
442
bin/clean.sh
442
bin/clean.sh
@@ -121,6 +121,13 @@ SECTION_ACTIVITY=0
|
||||
files_cleaned=0
|
||||
total_size_cleaned=0
|
||||
whitelist_skipped_count=0
|
||||
PROJECT_ARTIFACT_HINT_DETECTED=false
|
||||
PROJECT_ARTIFACT_HINT_COUNT=0
|
||||
PROJECT_ARTIFACT_HINT_TRUNCATED=false
|
||||
PROJECT_ARTIFACT_HINT_EXAMPLES=()
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATED_KB=0
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES=0
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=false
|
||||
|
||||
# shellcheck disable=SC2329
|
||||
note_activity() {
|
||||
@@ -294,6 +301,427 @@ classify_cleanup_risk() {
|
||||
echo "MEDIUM|User data files"
|
||||
}
|
||||
|
||||
# Quick reminder probe for project build artifacts handled by `mo purge`.
|
||||
# Designed to be very fast: shallow directory checks only, no du/find scans.
|
||||
# shellcheck disable=SC2329
|
||||
load_quick_purge_hint_paths() {
|
||||
local config_file="$HOME/.config/mole/purge_paths"
|
||||
local -a paths=()
|
||||
|
||||
if [[ -f "$config_file" ]]; then
|
||||
while IFS= read -r line; do
|
||||
line="${line#"${line%%[![:space:]]*}"}"
|
||||
line="${line%"${line##*[![:space:]]}"}"
|
||||
|
||||
[[ -z "$line" || "$line" =~ ^# ]] && continue
|
||||
[[ "$line" == ~* ]] && line="${line/#~/$HOME}"
|
||||
paths+=("$line")
|
||||
done < "$config_file"
|
||||
fi
|
||||
|
||||
if [[ ${#paths[@]} -eq 0 ]]; then
|
||||
paths=(
|
||||
"$HOME/www"
|
||||
"$HOME/dev"
|
||||
"$HOME/Projects"
|
||||
"$HOME/GitHub"
|
||||
"$HOME/Code"
|
||||
"$HOME/Workspace"
|
||||
"$HOME/Repos"
|
||||
"$HOME/Development"
|
||||
)
|
||||
fi
|
||||
|
||||
if [[ ${#paths[@]} -gt 0 ]]; then
|
||||
printf '%s\n' "${paths[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
# shellcheck disable=SC2329
|
||||
record_project_artifact_hint() {
|
||||
local path="$1"
|
||||
|
||||
((PROJECT_ARTIFACT_HINT_COUNT++))
|
||||
|
||||
if [[ ${#PROJECT_ARTIFACT_HINT_EXAMPLES[@]} -lt 2 ]]; then
|
||||
PROJECT_ARTIFACT_HINT_EXAMPLES+=("${path/#$HOME/~}")
|
||||
fi
|
||||
|
||||
local sample_max=3
|
||||
|
||||
if [[ $PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES -ge $sample_max ]]; then
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=true
|
||||
return 0
|
||||
fi
|
||||
|
||||
local timeout_seconds="0.8"
|
||||
|
||||
local du_tmp
|
||||
du_tmp=$(mktemp)
|
||||
|
||||
local du_status=0
|
||||
if run_with_timeout "$timeout_seconds" du -skP "$path" > "$du_tmp" 2> /dev/null; then
|
||||
du_status=0
|
||||
else
|
||||
du_status=$?
|
||||
fi
|
||||
|
||||
if [[ $du_status -eq 0 ]]; then
|
||||
local size_kb
|
||||
size_kb=$(awk 'NR==1 {print $1; exit}' "$du_tmp")
|
||||
if [[ "$size_kb" =~ ^[0-9]+$ ]]; then
|
||||
((PROJECT_ARTIFACT_HINT_ESTIMATED_KB += size_kb))
|
||||
((PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES++))
|
||||
else
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=true
|
||||
fi
|
||||
else
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=true
|
||||
fi
|
||||
|
||||
rm -f "$du_tmp"
|
||||
}
|
||||
|
||||
# shellcheck disable=SC2329
|
||||
is_quick_purge_project_root() {
|
||||
local dir="$1"
|
||||
local indicator
|
||||
|
||||
# Mirror purge single-project detection so configured project roots like
|
||||
# ~/www/Pake can still surface direct-child artifacts such as target/.
|
||||
local -a indicators=(
|
||||
"lerna.json"
|
||||
"pnpm-workspace.yaml"
|
||||
"nx.json"
|
||||
"rush.json"
|
||||
"package.json"
|
||||
"Cargo.toml"
|
||||
"go.mod"
|
||||
"pyproject.toml"
|
||||
"requirements.txt"
|
||||
"pom.xml"
|
||||
"build.gradle"
|
||||
"Gemfile"
|
||||
"composer.json"
|
||||
"pubspec.yaml"
|
||||
"Makefile"
|
||||
"build.zig"
|
||||
"build.zig.zon"
|
||||
".git"
|
||||
)
|
||||
|
||||
for indicator in "${indicators[@]}"; do
|
||||
if [[ -e "$dir/$indicator" ]]; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
# shellcheck disable=SC2329
|
||||
probe_project_artifact_hints() {
|
||||
PROJECT_ARTIFACT_HINT_DETECTED=false
|
||||
PROJECT_ARTIFACT_HINT_COUNT=0
|
||||
PROJECT_ARTIFACT_HINT_TRUNCATED=false
|
||||
PROJECT_ARTIFACT_HINT_EXAMPLES=()
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATED_KB=0
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES=0
|
||||
PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL=false
|
||||
|
||||
local max_projects=200
|
||||
local max_projects_per_root=0
|
||||
local max_nested_per_project=120
|
||||
local max_matches=12
|
||||
|
||||
# Fast hint list tracks most purge targets, but excludes high-noise names
|
||||
# like `bin` and `vendor` that need deeper context checks in purge logic.
|
||||
local -a target_names=(
|
||||
"node_modules"
|
||||
"target"
|
||||
"build"
|
||||
"dist"
|
||||
"venv"
|
||||
".venv"
|
||||
".pytest_cache"
|
||||
".mypy_cache"
|
||||
".tox"
|
||||
".nox"
|
||||
".ruff_cache"
|
||||
".gradle"
|
||||
"__pycache__"
|
||||
".next"
|
||||
".nuxt"
|
||||
".output"
|
||||
"obj"
|
||||
".turbo"
|
||||
".parcel-cache"
|
||||
".dart_tool"
|
||||
".zig-cache"
|
||||
"zig-out"
|
||||
".angular"
|
||||
".svelte-kit"
|
||||
".astro"
|
||||
"coverage"
|
||||
"DerivedData"
|
||||
"Pods"
|
||||
".cxx"
|
||||
".expo"
|
||||
)
|
||||
|
||||
local -a scan_roots=()
|
||||
while IFS= read -r path; do
|
||||
[[ -n "$path" ]] && scan_roots+=("$path")
|
||||
done < <(load_quick_purge_hint_paths)
|
||||
|
||||
[[ ${#scan_roots[@]} -eq 0 ]] && return 0
|
||||
|
||||
# Fairness: avoid one very large root exhausting the entire scan budget.
|
||||
if [[ $max_projects_per_root -le 0 ]]; then
|
||||
max_projects_per_root=$(((max_projects + ${#scan_roots[@]} - 1) / ${#scan_roots[@]}))
|
||||
[[ $max_projects_per_root -lt 25 ]] && max_projects_per_root=25
|
||||
fi
|
||||
[[ $max_projects_per_root -gt $max_projects ]] && max_projects_per_root=$max_projects
|
||||
|
||||
local nullglob_was_set=0
|
||||
if shopt -q nullglob; then
|
||||
nullglob_was_set=1
|
||||
fi
|
||||
shopt -s nullglob
|
||||
|
||||
local scanned_projects=0
|
||||
local stop_scan=false
|
||||
local root project_dir nested_dir target_name candidate
|
||||
|
||||
for root in "${scan_roots[@]}"; do
|
||||
[[ -d "$root" ]] || continue
|
||||
local root_projects_scanned=0
|
||||
|
||||
if is_quick_purge_project_root "$root"; then
|
||||
((scanned_projects++))
|
||||
((root_projects_scanned++))
|
||||
if [[ $scanned_projects -gt $max_projects ]]; then
|
||||
PROJECT_ARTIFACT_HINT_TRUNCATED=true
|
||||
stop_scan=true
|
||||
break
|
||||
fi
|
||||
|
||||
for target_name in "${target_names[@]}"; do
|
||||
candidate="$root/$target_name"
|
||||
if [[ -d "$candidate" ]]; then
|
||||
record_project_artifact_hint "$candidate"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
[[ "$stop_scan" == "true" ]] && break
|
||||
|
||||
if [[ $root_projects_scanned -ge $max_projects_per_root ]]; then
|
||||
PROJECT_ARTIFACT_HINT_TRUNCATED=true
|
||||
continue
|
||||
fi
|
||||
|
||||
for project_dir in "$root"/*/; do
|
||||
[[ -d "$project_dir" ]] || continue
|
||||
project_dir="${project_dir%/}"
|
||||
|
||||
local project_name
|
||||
project_name=$(basename "$project_dir")
|
||||
[[ "$project_name" == .* ]] && continue
|
||||
|
||||
if [[ $root_projects_scanned -ge $max_projects_per_root ]]; then
|
||||
PROJECT_ARTIFACT_HINT_TRUNCATED=true
|
||||
break
|
||||
fi
|
||||
|
||||
((scanned_projects++))
|
||||
((root_projects_scanned++))
|
||||
if [[ $scanned_projects -gt $max_projects ]]; then
|
||||
PROJECT_ARTIFACT_HINT_TRUNCATED=true
|
||||
stop_scan=true
|
||||
break
|
||||
fi
|
||||
|
||||
for target_name in "${target_names[@]}"; do
|
||||
candidate="$project_dir/$target_name"
|
||||
if [[ -d "$candidate" ]]; then
|
||||
record_project_artifact_hint "$candidate"
|
||||
fi
|
||||
done
|
||||
[[ "$stop_scan" == "true" ]] && break
|
||||
|
||||
local nested_count=0
|
||||
for nested_dir in "$project_dir"/*/; do
|
||||
[[ -d "$nested_dir" ]] || continue
|
||||
nested_dir="${nested_dir%/}"
|
||||
|
||||
local nested_name
|
||||
nested_name=$(basename "$nested_dir")
|
||||
[[ "$nested_name" == .* ]] && continue
|
||||
|
||||
case "$nested_name" in
|
||||
node_modules | target | build | dist | DerivedData | Pods)
|
||||
continue
|
||||
;;
|
||||
esac
|
||||
|
||||
((nested_count++))
|
||||
if [[ $nested_count -gt $max_nested_per_project ]]; then
|
||||
break
|
||||
fi
|
||||
|
||||
for target_name in "${target_names[@]}"; do
|
||||
candidate="$nested_dir/$target_name"
|
||||
if [[ -d "$candidate" ]]; then
|
||||
record_project_artifact_hint "$candidate"
|
||||
fi
|
||||
done
|
||||
|
||||
[[ "$stop_scan" == "true" ]] && break
|
||||
done
|
||||
|
||||
[[ "$stop_scan" == "true" ]] && break
|
||||
done
|
||||
|
||||
[[ "$stop_scan" == "true" ]] && break
|
||||
done
|
||||
|
||||
if [[ $nullglob_was_set -eq 0 ]]; then
|
||||
shopt -u nullglob
|
||||
fi
|
||||
|
||||
if [[ $PROJECT_ARTIFACT_HINT_COUNT -gt 0 ]]; then
|
||||
PROJECT_ARTIFACT_HINT_DETECTED=true
|
||||
fi
|
||||
|
||||
# Preserve a compact display hint if candidate count is large, but do not
|
||||
# stop scanning early solely because we exceeded this threshold.
|
||||
if [[ $PROJECT_ARTIFACT_HINT_COUNT -gt $max_matches ]]; then
|
||||
PROJECT_ARTIFACT_HINT_TRUNCATED=true
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# shellcheck disable=SC2329
|
||||
show_system_data_hint_notice() {
|
||||
local min_gb=2
|
||||
local timeout_seconds="0.8"
|
||||
local max_hits=3
|
||||
|
||||
local threshold_kb=$((min_gb * 1024 * 1024))
|
||||
local -a clue_labels=()
|
||||
local -a clue_sizes=()
|
||||
local -a clue_paths=()
|
||||
|
||||
local -a labels=(
|
||||
"Xcode DerivedData"
|
||||
"Xcode Archives"
|
||||
"iPhone backups"
|
||||
"Simulator data"
|
||||
"Docker Desktop data"
|
||||
"Mail data"
|
||||
)
|
||||
local -a paths=(
|
||||
"$HOME/Library/Developer/Xcode/DerivedData"
|
||||
"$HOME/Library/Developer/Xcode/Archives"
|
||||
"$HOME/Library/Application Support/MobileSync/Backup"
|
||||
"$HOME/Library/Developer/CoreSimulator/Devices"
|
||||
"$HOME/Library/Containers/com.docker.docker/Data"
|
||||
"$HOME/Library/Mail"
|
||||
)
|
||||
|
||||
local i
|
||||
for i in "${!paths[@]}"; do
|
||||
local path="${paths[$i]}"
|
||||
[[ -d "$path" ]] || continue
|
||||
|
||||
local du_tmp
|
||||
du_tmp=$(mktemp)
|
||||
local du_status=0
|
||||
if run_with_timeout "$timeout_seconds" du -skP "$path" > "$du_tmp" 2> /dev/null; then
|
||||
du_status=0
|
||||
else
|
||||
du_status=$?
|
||||
fi
|
||||
|
||||
if [[ $du_status -eq 0 ]]; then
|
||||
local size_kb
|
||||
size_kb=$(awk 'NR==1 {print $1; exit}' "$du_tmp")
|
||||
if [[ "$size_kb" =~ ^[0-9]+$ ]] && [[ "$size_kb" -ge "$threshold_kb" ]]; then
|
||||
clue_labels+=("${labels[$i]}")
|
||||
clue_sizes+=("$size_kb")
|
||||
clue_paths+=("${path/#$HOME/~}")
|
||||
if [[ ${#clue_labels[@]} -ge $max_hits ]]; then
|
||||
rm -f "$du_tmp"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
rm -f "$du_tmp"
|
||||
done
|
||||
|
||||
if [[ ${#clue_labels[@]} -eq 0 ]]; then
|
||||
note_activity
|
||||
echo -e " ${GREEN}${ICON_SUCCESS}${NC} No common System Data clues detected"
|
||||
return 0
|
||||
fi
|
||||
|
||||
note_activity
|
||||
|
||||
for i in "${!clue_labels[@]}"; do
|
||||
local human_size
|
||||
human_size=$(bytes_to_human "$((clue_sizes[$i] * 1024))")
|
||||
echo -e " ${GREEN}${ICON_LIST}${NC} ${clue_labels[$i]}: ${human_size}"
|
||||
echo -e " ${GRAY}${ICON_SUBLIST}${NC} Path: ${GRAY}${clue_paths[$i]}${NC}"
|
||||
done
|
||||
echo -e " ${GRAY}${ICON_REVIEW}${NC} Review: mo analyze, Device backups, docker system df"
|
||||
}
|
||||
|
||||
# shellcheck disable=SC2329
|
||||
show_project_artifact_hint_notice() {
|
||||
probe_project_artifact_hints
|
||||
|
||||
if [[ "$PROJECT_ARTIFACT_HINT_DETECTED" != "true" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
note_activity
|
||||
|
||||
local hint_count_label="$PROJECT_ARTIFACT_HINT_COUNT"
|
||||
[[ "$PROJECT_ARTIFACT_HINT_TRUNCATED" == "true" ]] && hint_count_label="${hint_count_label}+"
|
||||
|
||||
local example_text=""
|
||||
if [[ ${#PROJECT_ARTIFACT_HINT_EXAMPLES[@]} -gt 0 ]]; then
|
||||
example_text="${PROJECT_ARTIFACT_HINT_EXAMPLES[0]}"
|
||||
if [[ ${#PROJECT_ARTIFACT_HINT_EXAMPLES[@]} -gt 1 ]]; then
|
||||
example_text+=", ${PROJECT_ARTIFACT_HINT_EXAMPLES[1]}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ $PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES -gt 0 ]]; then
|
||||
local estimate_human
|
||||
estimate_human=$(bytes_to_human "$((PROJECT_ARTIFACT_HINT_ESTIMATED_KB * 1024))")
|
||||
|
||||
local estimate_is_partial="$PROJECT_ARTIFACT_HINT_ESTIMATE_PARTIAL"
|
||||
if [[ "$PROJECT_ARTIFACT_HINT_TRUNCATED" == "true" ]] || [[ $PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES -lt $PROJECT_ARTIFACT_HINT_COUNT ]]; then
|
||||
estimate_is_partial=true
|
||||
fi
|
||||
|
||||
if [[ "$estimate_is_partial" == "true" ]]; then
|
||||
echo -e " ${GREEN}${ICON_LIST}${NC} ${GREEN}${hint_count_label}${NC} candidates, at least ${estimate_human} sampled from ${PROJECT_ARTIFACT_HINT_ESTIMATE_SAMPLES} items"
|
||||
else
|
||||
echo -e " ${GREEN}${ICON_LIST}${NC} ${GREEN}${hint_count_label}${NC} candidates, sampled ${estimate_human}"
|
||||
fi
|
||||
else
|
||||
echo -e " ${GREEN}${ICON_LIST}${NC} ${GREEN}${hint_count_label}${NC} candidates"
|
||||
fi
|
||||
|
||||
if [[ -n "$example_text" ]]; then
|
||||
echo -e " ${GRAY}${ICON_SUBLIST}${NC} Examples: ${GRAY}${example_text}${NC}"
|
||||
fi
|
||||
echo -e " ${GRAY}${ICON_REVIEW}${NC} Review: mo purge"
|
||||
}
|
||||
|
||||
# shellcheck disable=SC2329
|
||||
safe_clean() {
|
||||
if [[ $# -eq 0 ]]; then
|
||||
@@ -867,7 +1295,7 @@ perform_cleanup() {
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
for pattern in "${WHITELIST_PATTERNS[@]}"; do
|
||||
[[ "$pattern" == "$FINDER_METADATA_SENTINEL" ]] && continue
|
||||
echo -e " ${GRAY}→ $pattern${NC}"
|
||||
echo -e " ${GRAY}${ICON_SUBLIST}${NC} ${GRAY}${pattern}${NC}"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
@@ -879,7 +1307,7 @@ perform_cleanup() {
|
||||
fda_status=$?
|
||||
if [[ $fda_status -eq 1 ]]; then
|
||||
echo ""
|
||||
echo -e "${GRAY}${ICON_WARNING}${NC} ${GRAY}Tip: Grant Full Disk Access to your terminal in System Settings for best results${NC}"
|
||||
echo -e "${GRAY}${ICON_REVIEW}${NC} ${GRAY}Grant Full Disk Access to your terminal in System Settings for best results${NC}"
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -976,6 +1404,16 @@ perform_cleanup() {
|
||||
check_large_file_candidates
|
||||
end_section
|
||||
|
||||
# ===== 15. System Data clues =====
|
||||
start_section "System Data clues"
|
||||
show_system_data_hint_notice
|
||||
end_section
|
||||
|
||||
# ===== 16. Project artifacts =====
|
||||
start_section "Project artifacts"
|
||||
show_project_artifact_hint_notice
|
||||
end_section
|
||||
|
||||
# ===== Final summary =====
|
||||
echo ""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user