diff --git a/bin/analyze-go b/bin/analyze-go index 1f046f8..bf51e42 100755 Binary files a/bin/analyze-go and b/bin/analyze-go differ diff --git a/cmd/analyze/analyze b/cmd/analyze/analyze new file mode 100755 index 0000000..46f1c47 Binary files /dev/null and b/cmd/analyze/analyze differ diff --git a/cmd/analyze/constants.go b/cmd/analyze/constants.go index 0ac6d11..438e3c8 100644 --- a/cmd/analyze/constants.go +++ b/cmd/analyze/constants.go @@ -16,6 +16,13 @@ const ( maxConcurrentOverview = 3 // Scan up to 3 overview dirs concurrently pathUpdateInterval = 500 // Update current path every N files batchUpdateSize = 100 // Batch atomic updates every N items + + // Worker pool configuration + minWorkers = 16 // Minimum workers for better I/O throughput + maxWorkers = 128 // Maximum workers to avoid excessive goroutines + cpuMultiplier = 4 // Worker multiplier per CPU core for I/O-bound operations + maxDirWorkers = 32 // Maximum concurrent subdirectory scans + openCommandTimeout = 10 * time.Second // Timeout for open/reveal commands ) var foldDirs = map[string]bool{ diff --git a/cmd/analyze/delete.go b/cmd/analyze/delete.go index 0aace7b..5a77122 100644 --- a/cmd/analyze/delete.go +++ b/cmd/analyze/delete.go @@ -22,9 +22,21 @@ func deletePathCmd(path string, counter *int64) tea.Cmd { func deletePathWithProgress(root string, counter *int64) (int64, error) { var count int64 + var firstErr error err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { if err != nil { + // Skip permission errors but continue walking + if os.IsPermission(err) { + if firstErr == nil { + firstErr = err + } + return filepath.SkipDir + } + // For other errors, record and continue + if firstErr == nil { + firstErr = err + } return nil } @@ -34,6 +46,9 @@ func deletePathWithProgress(root string, counter *int64) (int64, error) { if counter != nil { atomic.StoreInt64(counter, count) } + } else if firstErr == nil { + // Record first deletion error + firstErr = removeErr } } @@ -48,5 +63,6 @@ func deletePathWithProgress(root string, counter *int64) (int64, error) { return count, err } - return count, nil + // Return the first error encountered during deletion if any + return count, firstErr } diff --git a/cmd/analyze/main.go b/cmd/analyze/main.go index 63540e7..c4fb178 100644 --- a/cmd/analyze/main.go +++ b/cmd/analyze/main.go @@ -3,6 +3,7 @@ package main import ( + "context" "fmt" "io/fs" "os" @@ -448,6 +449,9 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { delete(m.overviewScanningSet, msg.path) if msg.err == nil { + if m.overviewSizeCache == nil { + m.overviewSizeCache = make(map[string]int64) + } m.overviewSizeCache[msg.path] = msg.size } @@ -630,16 +634,20 @@ func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) { if m.showLargeFiles { if len(m.largeFiles) > 0 { selected := m.largeFiles[m.largeSelected] - go func() { - _ = exec.Command("open", selected.path).Run() - }() + go func(path string) { + ctx, cancel := context.WithTimeout(context.Background(), openCommandTimeout) + defer cancel() + _ = exec.CommandContext(ctx, "open", path).Run() + }(selected.path) m.status = fmt.Sprintf("Opening %s...", selected.name) } } else if len(m.entries) > 0 { selected := m.entries[m.selected] - go func() { - _ = exec.Command("open", selected.path).Run() - }() + go func(path string) { + ctx, cancel := context.WithTimeout(context.Background(), openCommandTimeout) + defer cancel() + _ = exec.CommandContext(ctx, "open", path).Run() + }(selected.path) m.status = fmt.Sprintf("Opening %s...", selected.name) } case "f", "F": @@ -648,14 +656,18 @@ func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) { if len(m.largeFiles) > 0 { selected := m.largeFiles[m.largeSelected] go func(path string) { - _ = exec.Command("open", "-R", path).Run() + ctx, cancel := context.WithTimeout(context.Background(), openCommandTimeout) + defer cancel() + _ = exec.CommandContext(ctx, "open", "-R", path).Run() }(selected.path) m.status = fmt.Sprintf("Revealing %s in Finder...", selected.name) } } else if len(m.entries) > 0 { selected := m.entries[m.selected] go func(path string) { - _ = exec.Command("open", "-R", path).Run() + ctx, cancel := context.WithTimeout(context.Background(), openCommandTimeout) + defer cancel() + _ = exec.CommandContext(ctx, "open", "-R", path).Run() }(selected.path) m.status = fmt.Sprintf("Revealing %s in Finder...", selected.name) } @@ -934,12 +946,10 @@ func (m model) View() string { displayIndex := idx + 1 // Add unused time label if applicable - // For overview mode, get access time on-demand if not set and cache it + // For overview mode, get access time on-demand if not set lastAccess := entry.lastAccess if lastAccess.IsZero() && entry.path != "" { lastAccess = getLastAccessTime(entry.path) - // Cache the result to avoid repeated syscalls - m.entries[idx].lastAccess = lastAccess } unusedLabel := formatUnusedTime(lastAccess) if unusedLabel == "" { diff --git a/cmd/analyze/scanner.go b/cmd/analyze/scanner.go index 7159ef4..d645ea2 100644 --- a/cmd/analyze/scanner.go +++ b/cmd/analyze/scanner.go @@ -34,21 +34,20 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in // Use worker pool for concurrent directory scanning // For I/O-bound operations, use more workers than CPU count - maxWorkers := runtime.NumCPU() * 4 - if maxWorkers < 16 { - maxWorkers = 16 // Minimum 16 workers for better I/O throughput + numWorkers := runtime.NumCPU() * cpuMultiplier + if numWorkers < minWorkers { + numWorkers = minWorkers } - // Cap at 128 to avoid excessive goroutines - if maxWorkers > 128 { - maxWorkers = 128 + if numWorkers > maxWorkers { + numWorkers = maxWorkers } - if maxWorkers > len(children) { - maxWorkers = len(children) + if numWorkers > len(children) { + numWorkers = len(children) } - if maxWorkers < 1 { - maxWorkers = 1 + if numWorkers < 1 { + numWorkers = 1 } - sem := make(chan struct{}, maxWorkers) + sem := make(chan struct{}, numWorkers) var wg sync.WaitGroup // Use channels to collect results without lock contention @@ -91,8 +90,8 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in defer func() { <-sem }() // Try du command first for folded dirs (much faster) - size := calculateDirSizeWithDu(path) - if size <= 0 { + size, err := getDirectorySizeFromDu(path) + if err != nil || size <= 0 { // Fallback to walk if du fails size = calculateDirSizeFast(path, filesScanned, dirsScanned, bytesScanned, currentPath) } @@ -218,32 +217,6 @@ func shouldFoldDirWithPath(name, path string) bool { return false } -// calculateDirSizeWithDu uses du command for fast directory size calculation -// Returns size in bytes, or 0 if command fails -func calculateDirSizeWithDu(path string) int64 { - ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) - defer cancel() - - // Use -sk for 1K-block output, then convert to bytes - // macOS du doesn't support -b flag - cmd := exec.CommandContext(ctx, "du", "-sk", path) - output, err := cmd.Output() - if err != nil { - return 0 - } - - fields := strings.Fields(string(output)) - if len(fields) < 1 { - return 0 - } - - kb, err := strconv.ParseInt(fields[0], 10, 64) - if err != nil { - return 0 - } - - return kb * 1024 -} func shouldSkipFileForLargeTracking(path string) bool { ext := strings.ToLower(filepath.Ext(path)) @@ -323,7 +296,10 @@ func findLargeFilesWithSpotlight(root string, minSize int64) []fileEntry { // mdfind query: files >= minSize in the specified directory query := fmt.Sprintf("kMDItemFSSize >= %d", minSize) - cmd := exec.Command("mdfind", "-onlyin", root, query) + ctx, cancel := context.WithTimeout(context.Background(), mdlsTimeout) + defer cancel() + + cmd := exec.CommandContext(ctx, "mdfind", "-onlyin", root, query) output, err := cmd.Output() if err != nil { // Fallback: mdfind not available or failed @@ -405,8 +381,8 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, fil // Limit concurrent subdirectory scans to avoid too many goroutines maxConcurrent := runtime.NumCPU() * 2 - if maxConcurrent > 32 { - maxConcurrent = 32 + if maxConcurrent > maxDirWorkers { + maxConcurrent = maxDirWorkers } sem := make(chan struct{}, maxConcurrent) @@ -420,8 +396,8 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, fil wg.Add(1) go func(path string) { defer wg.Done() - size := calculateDirSizeWithDu(path) - if size > 0 { + size, err := getDirectorySizeFromDu(path) + if err == nil && size > 0 { atomic.AddInt64(&total, size) atomic.AddInt64(bytesScanned, size) atomic.AddInt64(dirsScanned, 1) @@ -507,45 +483,6 @@ func measureOverviewSize(path string) (int64, error) { return 0, fmt.Errorf("unable to measure directory size with fast methods") } -func getDirectorySizeFromMetadata(path string) (int64, error) { - info, err := os.Stat(path) - if err != nil { - return 0, fmt.Errorf("cannot stat path: %v", err) - } - if !info.IsDir() { - return 0, fmt.Errorf("not a directory") - } - - ctx, cancel := context.WithTimeout(context.Background(), mdlsTimeout) - defer cancel() - - cmd := exec.CommandContext(ctx, "mdls", "-raw", "-name", "kMDItemFSSize", path) - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - if ctx.Err() == context.DeadlineExceeded { - return 0, fmt.Errorf("mdls timeout after %v", mdlsTimeout) - } - if stderr.Len() > 0 { - return 0, fmt.Errorf("mdls failed: %v (%s)", err, stderr.String()) - } - return 0, fmt.Errorf("mdls failed: %v", err) - } - value := strings.TrimSpace(stdout.String()) - if value == "" || value == "(null)" { - return 0, fmt.Errorf("metadata size unavailable") - } - size, err := strconv.ParseInt(value, 10, 64) - if err != nil { - return 0, fmt.Errorf("failed to parse mdls output: %v", err) - } - if size <= 0 { - return 0, fmt.Errorf("mdls size invalid: %d", size) - } - return size, nil -} func getDirectorySizeFromDu(path string) (int64, error) { ctx, cancel := context.WithTimeout(context.Background(), duTimeout)