From 4f07a5aa0a6da2ae6d400a3a4199d0df380b5685 Mon Sep 17 00:00:00 2001 From: Tw93 Date: Sat, 21 Mar 2026 15:19:32 +0800 Subject: [PATCH] perf(clean): speed up scan phase across all modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Parallelize project cache indicator checks and root scans in caches.sh (sequential find per $HOME/* dir → throttled background jobs) - Pre-compute project name cache before purge display loop, eliminating O(N²) filesystem traversals in get_artifact_display_name - Replace basename/dirname subshells with parameter expansion in get_project_name/get_project_path hot loops; depth counter via bash string arithmetic instead of echo|tr|wc|tr pipeline - Eliminate mktemp/awk/rm per call in app_support_item_size_bytes, use command substitution to capture du output directly - Defer tr lowercase in clean_application_support_logs to only when first 3 protection checks fail; replace basename with ${var##*/} - Optimize trash item counting: find -print0|tr -dc '\0'|wc -c avoids per-file fork from -exec printf - Add -maxdepth 5 to /private/var/folders find (X dir is always at depth 3, code_sign_clone at depth 5 max; verified on real machine) --- lib/clean/caches.sh | 32 ++++++++++++++-- lib/clean/project.sh | 87 +++++++++++++++++++++++++++++++++----------- lib/clean/system.sh | 2 +- lib/clean/user.sh | 38 +++++++------------ 4 files changed, 108 insertions(+), 51 deletions(-) diff --git a/lib/clean/caches.sh b/lib/clean/caches.sh index abda06c..bed2827 100644 --- a/lib/clean/caches.sh +++ b/lib/clean/caches.sh @@ -129,12 +129,18 @@ discover_project_cache_roots() { [[ -d "$root" ]] && roots+=("$root") done < <(mole_purge_read_paths_config "$HOME/.config/mole/purge_paths") + local _indicator_tmp + _indicator_tmp=$(create_temp_file) + local -a _indicator_pids=() + local _max_jobs + _max_jobs=$(get_optimal_parallel_jobs scan) + local dir local base for dir in "$HOME"/*/; do [[ -d "$dir" ]] || continue dir="${dir%/}" - base=$(basename "$dir") + base="${dir##*/}" case "$base" in .* | Library | Applications | Movies | Music | Pictures | Public) @@ -142,10 +148,23 @@ discover_project_cache_roots() { ;; esac - if project_cache_has_indicators "$dir" 5; then - roots+=("$dir") + (project_cache_has_indicators "$dir" 5 && echo "$dir" >> "$_indicator_tmp") & + _indicator_pids+=($!) + + if [[ ${#_indicator_pids[@]} -ge $_max_jobs ]]; then + wait "${_indicator_pids[0]}" 2> /dev/null || true + _indicator_pids=("${_indicator_pids[@]:1}") fi done + for _pid in "${_indicator_pids[@]}"; do + wait "$_pid" 2> /dev/null || true + done + + local _found_dir + while IFS= read -r _found_dir; do + [[ -n "$_found_dir" ]] && roots+=("$_found_dir") + done < "$_indicator_tmp" + rm -f "$_indicator_tmp" [[ ${#roots[@]} -eq 0 ]] && return 0 @@ -211,8 +230,13 @@ clean_project_caches() { start_inline_spinner "Searching project caches..." fi + local -a _scan_pids=() for root in "${scan_roots[@]}"; do - scan_project_cache_root "$root" "$matches_tmp_file" + scan_project_cache_root "$root" "$matches_tmp_file" & + _scan_pids+=($!) + done + for _pid in "${_scan_pids[@]}"; do + wait "$_pid" 2> /dev/null || true done if [[ -t 1 ]]; then diff --git a/lib/clean/project.sh b/lib/clean/project.sh index 0d677b9..a4cb55f 100644 --- a/lib/clean/project.sh +++ b/lib/clean/project.sh @@ -1089,8 +1089,8 @@ clean_project_artifacts() { get_project_name() { local path="$1" - local current_dir - current_dir=$(dirname "$path") + local current_dir="${path%/*}" + [[ -z "$current_dir" ]] && current_dir="/" local monorepo_root="" local project_root="" @@ -1123,20 +1123,23 @@ clean_project_artifacts() { # If we found project but still checking for monorepo above # (only stop if we're beyond reasonable depth) - local depth=$(echo "${current_dir#"$HOME"}" | LC_ALL=C tr -cd '/' | wc -c | tr -d ' ') + local _rel="${current_dir#"$HOME"}" + local _stripped="${_rel//\//}" + local depth=$((${#_rel} - ${#_stripped})) if [[ -n "$project_root" && $depth -lt 2 ]]; then break fi - current_dir=$(dirname "$current_dir") + local _parent="${current_dir%/*}" + current_dir="${_parent:-/}" done # Determine result: monorepo > project > fallback local result="" if [[ -n "$monorepo_root" ]]; then - result=$(basename "$monorepo_root") + result="${monorepo_root##*/}" elif [[ -n "$project_root" ]]; then - result=$(basename "$project_root") + result="${project_root##*/}" else # Fallback: first directory under search root local search_roots=() @@ -1149,14 +1152,16 @@ clean_project_artifacts() { root="${root%/}" if [[ -n "$root" && "$path" == "$root/"* ]]; then local relative_path="${path#"$root"/}" - result=$(echo "$relative_path" | cut -d'/' -f1) + result="${relative_path%%/*}" break fi done # Final fallback: use grandparent directory if [[ -z "$result" ]]; then - result=$(dirname "$(dirname "$path")" | xargs basename) + local _gp="${path%/*}" + _gp="${_gp%/*}" + result="${_gp##*/}" fi fi @@ -1170,8 +1175,8 @@ clean_project_artifacts() { get_project_path() { local path="$1" - local current_dir - current_dir=$(dirname "$path") + local current_dir="${path%/*}" + [[ -z "$current_dir" ]] && current_dir="/" local monorepo_root="" local project_root="" @@ -1203,12 +1208,15 @@ clean_project_artifacts() { fi # If we found project but still checking for monorepo above - local depth=$(echo "${current_dir#"$HOME"}" | LC_ALL=C tr -cd '/' | wc -c | tr -d ' ') + local _rel="${current_dir#"$HOME"}" + local _stripped="${_rel//\//}" + local depth=$((${#_rel} - ${#_stripped})) if [[ -n "$project_root" && $depth -lt 2 ]]; then break fi - current_dir=$(dirname "$current_dir") + local _parent="${current_dir%/*}" + current_dir="${_parent:-/}" done # Determine result: monorepo > project > fallback @@ -1219,7 +1227,7 @@ clean_project_artifacts() { result="$project_root" else # Fallback: use parent directory of artifact - result=$(dirname "$path") + result="${path%/*}" fi # Convert to ~ format for cleaner display @@ -1229,23 +1237,48 @@ clean_project_artifacts() { # Helper to get artifact display name # For duplicate artifact names within same project, include parent directory for context + # Uses pre-computed _cached_basenames and _cached_project_names arrays when available. get_artifact_display_name() { local path="$1" - local artifact_name=$(basename "$path") - local project_name=$(get_project_name "$path") - local parent_name=$(basename "$(dirname "$path")") + local artifact_name="${path##*/}" + local parent_name="${path%/*}" + parent_name="${parent_name##*/}" + + local project_name + if [[ -n "${_cached_project_names[*]+x}" ]]; then + # Fast path: use pre-computed cache + local _idx + project_name="" + for _idx in "${!safe_to_clean[@]}"; do + if [[ "${safe_to_clean[$_idx]}" == "$path" ]]; then + project_name="${_cached_project_names[$_idx]}" + break + fi + done + else + project_name=$(get_project_name "$path") + fi # Check if there are other items with same artifact name AND same project local has_duplicate=false - for other_item in "${safe_to_clean[@]}"; do - if [[ "$other_item" != "$path" && "$(basename "$other_item")" == "$artifact_name" ]]; then - # Same artifact name, check if same project - if [[ "$(get_project_name "$other_item")" == "$project_name" ]]; then + if [[ -n "${_cached_basenames[*]+x}" ]]; then + local _idx + for _idx in "${!safe_to_clean[@]}"; do + if [[ "${safe_to_clean[$_idx]}" != "$path" && "${_cached_basenames[$_idx]}" == "$artifact_name" && "${_cached_project_names[$_idx]}" == "$project_name" ]]; then has_duplicate=true break fi - fi - done + done + else + for other_item in "${safe_to_clean[@]}"; do + if [[ "$other_item" != "$path" && "${other_item##*/}" == "$artifact_name" ]]; then + if [[ "$(get_project_name "$other_item")" == "$project_name" ]]; then + has_duplicate=true + break + fi + fi + done + fi # If duplicate exists in same project and parent is not the project itself, show parent/artifact if [[ "$has_duplicate" == "true" && "$parent_name" != "$project_name" && "$parent_name" != "." && "$parent_name" != "/" ]]; then @@ -1295,6 +1328,16 @@ clean_project_artifacts() { # Format: "project_path size | artifact_type" printf "%-*s %9s | %-*s" "$printf_width" "$truncated_path" "$size_str" "$artifact_col" "$artifact_type" } + # Pre-compute basenames and project names once so get_artifact_display_name() + # can avoid repeated filesystem traversals during the O(N^2) duplicate check. + local -a _cached_basenames=() + local -a _cached_project_names=() + local _pre_idx + for _pre_idx in "${!safe_to_clean[@]}"; do + _cached_basenames[_pre_idx]="${safe_to_clean[$_pre_idx]##*/}" + _cached_project_names[_pre_idx]=$(get_project_name "${safe_to_clean[$_pre_idx]}") + done + # Build menu options - one line per artifact # Pass 1: collect data into parallel arrays (needed for pre-scan of widths). # Sizes are read from pre-computed results (parallel du calls launched above). diff --git a/lib/clean/system.sh b/lib/clean/system.sh index 817964e..27a3065 100644 --- a/lib/clean/system.sh +++ b/lib/clean/system.sh @@ -155,7 +155,7 @@ clean_deep_system() { if safe_sudo_remove "$cache_dir"; then code_sign_cleaned=$((code_sign_cleaned + 1)) fi - done < <(run_with_timeout 5 command find /private/var/folders -type d -name "*.code_sign_clone" -path "*/X/*" -print0 2> /dev/null || true) + done < <(run_with_timeout 5 command find /private/var/folders -maxdepth 5 -type d -name "*.code_sign_clone" -path "*/X/*" -print0 2> /dev/null || true) stop_section_spinner [[ $code_sign_cleaned -gt 0 ]] && log_success "Browser code signature caches, $code_sign_cleaned items" diff --git a/lib/clean/user.sh b/lib/clean/user.sh index 1eb8ae4..db21621 100644 --- a/lib/clean/user.sh +++ b/lib/clean/user.sh @@ -13,15 +13,15 @@ clean_user_essentials() { local trash_count_status=0 # Skip AppleScript during tests to avoid permission dialogs if [[ "${MOLE_TEST_MODE:-0}" == "1" || "${MOLE_TEST_NO_AUTH:-0}" == "1" ]]; then - trash_count=$(command find "$HOME/.Trash" -mindepth 1 -maxdepth 1 -exec printf '.' ';' 2> /dev/null | - wc -c | awk '{print $1}' || echo "0") + trash_count=$(command find "$HOME/.Trash" -mindepth 1 -maxdepth 1 -print0 2> /dev/null | + tr -dc '\0' | wc -c | tr -d ' ' || echo "0") else trash_count=$(run_with_timeout 3 osascript -e 'tell application "Finder" to count items in trash' 2> /dev/null) || trash_count_status=$? fi if [[ $trash_count_status -eq 124 ]]; then debug_log "Finder trash count timed out, using direct .Trash scan" - trash_count=$(command find "$HOME/.Trash" -mindepth 1 -maxdepth 1 -exec printf '.' ';' 2> /dev/null | - wc -c | awk '{print $1}' || echo "0") + trash_count=$(command find "$HOME/.Trash" -mindepth 1 -maxdepth 1 -print0 2> /dev/null | + tr -dc '\0' | wc -c | tr -d ' ' || echo "0") fi [[ "$trash_count" =~ ^[0-9]+$ ]] || trash_count="0" @@ -1093,24 +1093,13 @@ app_support_item_size_bytes() { return 1 fi - local du_tmp - du_tmp=$(mktemp) - local du_status=0 + local du_output # Use stricter timeout for directories - if run_with_timeout "$timeout_seconds" du -skP "$item" > "$du_tmp" 2> /dev/null; then - du_status=0 - else - du_status=$? - fi - - if [[ $du_status -ne 0 ]]; then - rm -f "$du_tmp" + if ! du_output=$(run_with_timeout "$timeout_seconds" du -skP "$item" 2> /dev/null); then return 1 fi - local size_kb - size_kb=$(awk 'NR==1 {print $1; exit}' "$du_tmp") - rm -f "$du_tmp" + local size_kb="${du_output%%[^0-9]*}" [[ "$size_kb" =~ ^[0-9]+$ ]] || return 1 printf '%s\n' "$((size_kb * 1024))" return 0 @@ -1155,12 +1144,9 @@ clean_application_support_logs() { last_progress_update=$(get_epoch_seconds) for app_dir in ~/Library/Application\ Support/*; do [[ -d "$app_dir" ]] || continue - local app_name - app_name=$(basename "$app_dir") + local app_name="${app_dir##*/}" app_count=$((app_count + 1)) update_progress_if_needed "$app_count" "$total_apps" last_progress_update 1 || true - local app_name_lower - app_name_lower=$(echo "$app_name" | LC_ALL=C tr '[:upper:]' '[:lower:]') local is_protected=false if is_path_whitelisted "$app_dir" 2> /dev/null; then is_protected=true @@ -1168,8 +1154,12 @@ clean_application_support_logs() { is_protected=true elif should_protect_data "$app_name"; then is_protected=true - elif should_protect_data "$app_name_lower"; then - is_protected=true + else + local app_name_lower + app_name_lower=$(echo "$app_name" | LC_ALL=C tr '[:upper:]' '[:lower:]') + if should_protect_data "$app_name_lower"; then + is_protected=true + fi fi if [[ "$is_protected" == "true" ]]; then continue