1
0
mirror of https://github.com/tw93/Mole.git synced 2026-03-22 15:00:07 +00:00

perf(clean): speed up scan phase across all modules

- Parallelize project cache indicator checks and root scans in caches.sh
  (sequential find per $HOME/* dir → throttled background jobs)
- Pre-compute project name cache before purge display loop, eliminating
  O(N²) filesystem traversals in get_artifact_display_name
- Replace basename/dirname subshells with parameter expansion in
  get_project_name/get_project_path hot loops; depth counter via
  bash string arithmetic instead of echo|tr|wc|tr pipeline
- Eliminate mktemp/awk/rm per call in app_support_item_size_bytes,
  use command substitution to capture du output directly
- Defer tr lowercase in clean_application_support_logs to only when
  first 3 protection checks fail; replace basename with ${var##*/}
- Optimize trash item counting: find -print0|tr -dc '\0'|wc -c
  avoids per-file fork from -exec printf
- Add -maxdepth 5 to /private/var/folders find (X dir is always at
  depth 3, code_sign_clone at depth 5 max; verified on real machine)
This commit is contained in:
Tw93
2026-03-21 15:19:32 +08:00
parent 694c191c6f
commit 4f07a5aa0a
4 changed files with 108 additions and 51 deletions

View File

@@ -129,12 +129,18 @@ discover_project_cache_roots() {
[[ -d "$root" ]] && roots+=("$root")
done < <(mole_purge_read_paths_config "$HOME/.config/mole/purge_paths")
local _indicator_tmp
_indicator_tmp=$(create_temp_file)
local -a _indicator_pids=()
local _max_jobs
_max_jobs=$(get_optimal_parallel_jobs scan)
local dir
local base
for dir in "$HOME"/*/; do
[[ -d "$dir" ]] || continue
dir="${dir%/}"
base=$(basename "$dir")
base="${dir##*/}"
case "$base" in
.* | Library | Applications | Movies | Music | Pictures | Public)
@@ -142,10 +148,23 @@ discover_project_cache_roots() {
;;
esac
if project_cache_has_indicators "$dir" 5; then
roots+=("$dir")
(project_cache_has_indicators "$dir" 5 && echo "$dir" >> "$_indicator_tmp") &
_indicator_pids+=($!)
if [[ ${#_indicator_pids[@]} -ge $_max_jobs ]]; then
wait "${_indicator_pids[0]}" 2> /dev/null || true
_indicator_pids=("${_indicator_pids[@]:1}")
fi
done
for _pid in "${_indicator_pids[@]}"; do
wait "$_pid" 2> /dev/null || true
done
local _found_dir
while IFS= read -r _found_dir; do
[[ -n "$_found_dir" ]] && roots+=("$_found_dir")
done < "$_indicator_tmp"
rm -f "$_indicator_tmp"
[[ ${#roots[@]} -eq 0 ]] && return 0
@@ -211,8 +230,13 @@ clean_project_caches() {
start_inline_spinner "Searching project caches..."
fi
local -a _scan_pids=()
for root in "${scan_roots[@]}"; do
scan_project_cache_root "$root" "$matches_tmp_file"
scan_project_cache_root "$root" "$matches_tmp_file" &
_scan_pids+=($!)
done
for _pid in "${_scan_pids[@]}"; do
wait "$_pid" 2> /dev/null || true
done
if [[ -t 1 ]]; then

View File

@@ -1089,8 +1089,8 @@ clean_project_artifacts() {
get_project_name() {
local path="$1"
local current_dir
current_dir=$(dirname "$path")
local current_dir="${path%/*}"
[[ -z "$current_dir" ]] && current_dir="/"
local monorepo_root=""
local project_root=""
@@ -1123,20 +1123,23 @@ clean_project_artifacts() {
# If we found project but still checking for monorepo above
# (only stop if we're beyond reasonable depth)
local depth=$(echo "${current_dir#"$HOME"}" | LC_ALL=C tr -cd '/' | wc -c | tr -d ' ')
local _rel="${current_dir#"$HOME"}"
local _stripped="${_rel//\//}"
local depth=$((${#_rel} - ${#_stripped}))
if [[ -n "$project_root" && $depth -lt 2 ]]; then
break
fi
current_dir=$(dirname "$current_dir")
local _parent="${current_dir%/*}"
current_dir="${_parent:-/}"
done
# Determine result: monorepo > project > fallback
local result=""
if [[ -n "$monorepo_root" ]]; then
result=$(basename "$monorepo_root")
result="${monorepo_root##*/}"
elif [[ -n "$project_root" ]]; then
result=$(basename "$project_root")
result="${project_root##*/}"
else
# Fallback: first directory under search root
local search_roots=()
@@ -1149,14 +1152,16 @@ clean_project_artifacts() {
root="${root%/}"
if [[ -n "$root" && "$path" == "$root/"* ]]; then
local relative_path="${path#"$root"/}"
result=$(echo "$relative_path" | cut -d'/' -f1)
result="${relative_path%%/*}"
break
fi
done
# Final fallback: use grandparent directory
if [[ -z "$result" ]]; then
result=$(dirname "$(dirname "$path")" | xargs basename)
local _gp="${path%/*}"
_gp="${_gp%/*}"
result="${_gp##*/}"
fi
fi
@@ -1170,8 +1175,8 @@ clean_project_artifacts() {
get_project_path() {
local path="$1"
local current_dir
current_dir=$(dirname "$path")
local current_dir="${path%/*}"
[[ -z "$current_dir" ]] && current_dir="/"
local monorepo_root=""
local project_root=""
@@ -1203,12 +1208,15 @@ clean_project_artifacts() {
fi
# If we found project but still checking for monorepo above
local depth=$(echo "${current_dir#"$HOME"}" | LC_ALL=C tr -cd '/' | wc -c | tr -d ' ')
local _rel="${current_dir#"$HOME"}"
local _stripped="${_rel//\//}"
local depth=$((${#_rel} - ${#_stripped}))
if [[ -n "$project_root" && $depth -lt 2 ]]; then
break
fi
current_dir=$(dirname "$current_dir")
local _parent="${current_dir%/*}"
current_dir="${_parent:-/}"
done
# Determine result: monorepo > project > fallback
@@ -1219,7 +1227,7 @@ clean_project_artifacts() {
result="$project_root"
else
# Fallback: use parent directory of artifact
result=$(dirname "$path")
result="${path%/*}"
fi
# Convert to ~ format for cleaner display
@@ -1229,23 +1237,48 @@ clean_project_artifacts() {
# Helper to get artifact display name
# For duplicate artifact names within same project, include parent directory for context
# Uses pre-computed _cached_basenames and _cached_project_names arrays when available.
get_artifact_display_name() {
local path="$1"
local artifact_name=$(basename "$path")
local project_name=$(get_project_name "$path")
local parent_name=$(basename "$(dirname "$path")")
local artifact_name="${path##*/}"
local parent_name="${path%/*}"
parent_name="${parent_name##*/}"
local project_name
if [[ -n "${_cached_project_names[*]+x}" ]]; then
# Fast path: use pre-computed cache
local _idx
project_name=""
for _idx in "${!safe_to_clean[@]}"; do
if [[ "${safe_to_clean[$_idx]}" == "$path" ]]; then
project_name="${_cached_project_names[$_idx]}"
break
fi
done
else
project_name=$(get_project_name "$path")
fi
# Check if there are other items with same artifact name AND same project
local has_duplicate=false
for other_item in "${safe_to_clean[@]}"; do
if [[ "$other_item" != "$path" && "$(basename "$other_item")" == "$artifact_name" ]]; then
# Same artifact name, check if same project
if [[ "$(get_project_name "$other_item")" == "$project_name" ]]; then
if [[ -n "${_cached_basenames[*]+x}" ]]; then
local _idx
for _idx in "${!safe_to_clean[@]}"; do
if [[ "${safe_to_clean[$_idx]}" != "$path" && "${_cached_basenames[$_idx]}" == "$artifact_name" && "${_cached_project_names[$_idx]}" == "$project_name" ]]; then
has_duplicate=true
break
fi
fi
done
done
else
for other_item in "${safe_to_clean[@]}"; do
if [[ "$other_item" != "$path" && "${other_item##*/}" == "$artifact_name" ]]; then
if [[ "$(get_project_name "$other_item")" == "$project_name" ]]; then
has_duplicate=true
break
fi
fi
done
fi
# If duplicate exists in same project and parent is not the project itself, show parent/artifact
if [[ "$has_duplicate" == "true" && "$parent_name" != "$project_name" && "$parent_name" != "." && "$parent_name" != "/" ]]; then
@@ -1295,6 +1328,16 @@ clean_project_artifacts() {
# Format: "project_path size | artifact_type"
printf "%-*s %9s | %-*s" "$printf_width" "$truncated_path" "$size_str" "$artifact_col" "$artifact_type"
}
# Pre-compute basenames and project names once so get_artifact_display_name()
# can avoid repeated filesystem traversals during the O(N^2) duplicate check.
local -a _cached_basenames=()
local -a _cached_project_names=()
local _pre_idx
for _pre_idx in "${!safe_to_clean[@]}"; do
_cached_basenames[_pre_idx]="${safe_to_clean[$_pre_idx]##*/}"
_cached_project_names[_pre_idx]=$(get_project_name "${safe_to_clean[$_pre_idx]}")
done
# Build menu options - one line per artifact
# Pass 1: collect data into parallel arrays (needed for pre-scan of widths).
# Sizes are read from pre-computed results (parallel du calls launched above).

View File

@@ -155,7 +155,7 @@ clean_deep_system() {
if safe_sudo_remove "$cache_dir"; then
code_sign_cleaned=$((code_sign_cleaned + 1))
fi
done < <(run_with_timeout 5 command find /private/var/folders -type d -name "*.code_sign_clone" -path "*/X/*" -print0 2> /dev/null || true)
done < <(run_with_timeout 5 command find /private/var/folders -maxdepth 5 -type d -name "*.code_sign_clone" -path "*/X/*" -print0 2> /dev/null || true)
stop_section_spinner
[[ $code_sign_cleaned -gt 0 ]] && log_success "Browser code signature caches, $code_sign_cleaned items"

View File

@@ -13,15 +13,15 @@ clean_user_essentials() {
local trash_count_status=0
# Skip AppleScript during tests to avoid permission dialogs
if [[ "${MOLE_TEST_MODE:-0}" == "1" || "${MOLE_TEST_NO_AUTH:-0}" == "1" ]]; then
trash_count=$(command find "$HOME/.Trash" -mindepth 1 -maxdepth 1 -exec printf '.' ';' 2> /dev/null |
wc -c | awk '{print $1}' || echo "0")
trash_count=$(command find "$HOME/.Trash" -mindepth 1 -maxdepth 1 -print0 2> /dev/null |
tr -dc '\0' | wc -c | tr -d ' ' || echo "0")
else
trash_count=$(run_with_timeout 3 osascript -e 'tell application "Finder" to count items in trash' 2> /dev/null) || trash_count_status=$?
fi
if [[ $trash_count_status -eq 124 ]]; then
debug_log "Finder trash count timed out, using direct .Trash scan"
trash_count=$(command find "$HOME/.Trash" -mindepth 1 -maxdepth 1 -exec printf '.' ';' 2> /dev/null |
wc -c | awk '{print $1}' || echo "0")
trash_count=$(command find "$HOME/.Trash" -mindepth 1 -maxdepth 1 -print0 2> /dev/null |
tr -dc '\0' | wc -c | tr -d ' ' || echo "0")
fi
[[ "$trash_count" =~ ^[0-9]+$ ]] || trash_count="0"
@@ -1093,24 +1093,13 @@ app_support_item_size_bytes() {
return 1
fi
local du_tmp
du_tmp=$(mktemp)
local du_status=0
local du_output
# Use stricter timeout for directories
if run_with_timeout "$timeout_seconds" du -skP "$item" > "$du_tmp" 2> /dev/null; then
du_status=0
else
du_status=$?
fi
if [[ $du_status -ne 0 ]]; then
rm -f "$du_tmp"
if ! du_output=$(run_with_timeout "$timeout_seconds" du -skP "$item" 2> /dev/null); then
return 1
fi
local size_kb
size_kb=$(awk 'NR==1 {print $1; exit}' "$du_tmp")
rm -f "$du_tmp"
local size_kb="${du_output%%[^0-9]*}"
[[ "$size_kb" =~ ^[0-9]+$ ]] || return 1
printf '%s\n' "$((size_kb * 1024))"
return 0
@@ -1155,12 +1144,9 @@ clean_application_support_logs() {
last_progress_update=$(get_epoch_seconds)
for app_dir in ~/Library/Application\ Support/*; do
[[ -d "$app_dir" ]] || continue
local app_name
app_name=$(basename "$app_dir")
local app_name="${app_dir##*/}"
app_count=$((app_count + 1))
update_progress_if_needed "$app_count" "$total_apps" last_progress_update 1 || true
local app_name_lower
app_name_lower=$(echo "$app_name" | LC_ALL=C tr '[:upper:]' '[:lower:]')
local is_protected=false
if is_path_whitelisted "$app_dir" 2> /dev/null; then
is_protected=true
@@ -1168,8 +1154,12 @@ clean_application_support_logs() {
is_protected=true
elif should_protect_data "$app_name"; then
is_protected=true
elif should_protect_data "$app_name_lower"; then
is_protected=true
else
local app_name_lower
app_name_lower=$(echo "$app_name" | LC_ALL=C tr '[:upper:]' '[:lower:]')
if should_protect_data "$app_name_lower"; then
is_protected=true
fi
fi
if [[ "$is_protected" == "true" ]]; then
continue