From cf4690191ed923ac03156915ef4c19571d40cdda Mon Sep 17 00:00:00 2001 From: Jack Phallen Date: Sun, 11 Jan 2026 10:39:27 -0500 Subject: [PATCH 1/2] Fix semaphore acquisition order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acquire semaphore before spawning goroutine in calculateDirSizeFast to prevent goroutine explosion. Previously, all subdirectory goroutines were spawned immediately and blocked on the semaphore—now the spawning itself is throttled. --- cmd/analyze/scanner.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cmd/analyze/scanner.go b/cmd/analyze/scanner.go index 0d7ddca..a417ee5 100644 --- a/cmd/analyze/scanner.go +++ b/cmd/analyze/scanner.go @@ -126,10 +126,10 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in // ~/Library is scanned separately; reuse cache when possible. if isHomeDir && child.Name() == "Library" { + sem <- struct{}{} wg.Add(1) go func(name, path string) { defer wg.Done() - sem <- struct{}{} defer func() { <-sem }() var size int64 @@ -156,10 +156,10 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in // Folded dirs: fast size without expanding. if shouldFoldDirWithPath(child.Name(), fullPath) { + sem <- struct{}{} wg.Add(1) go func(name, path string) { defer wg.Done() - sem <- struct{}{} defer func() { <-sem }() size, err := getDirectorySizeFromDu(path) @@ -180,10 +180,10 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in continue } + sem <- struct{}{} wg.Add(1) go func(name, path string) { defer wg.Done() - sem <- struct{}{} defer func() { <-sem }() size := calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath) @@ -311,11 +311,11 @@ func calculateDirSizeFast(root string, filesScanned, dirsScanned, bytesScanned * for _, entry := range entries { if entry.IsDir() { - wg.Add(1) subDir := filepath.Join(dirPath, entry.Name()) + sem <- struct{}{} + wg.Add(1) go func(p string) { defer wg.Done() - sem <- struct{}{} defer func() { <-sem }() walk(p) }(subDir) @@ -446,9 +446,11 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, fil if child.IsDir() { if shouldFoldDirWithPath(child.Name(), fullPath) { + sem <- struct{}{} wg.Add(1) go func(path string) { defer wg.Done() + defer func() { <-sem }() size, err := getDirectorySizeFromDu(path) if err == nil && size > 0 { atomic.AddInt64(&total, size) @@ -459,10 +461,10 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, fil continue } + sem <- struct{}{} wg.Add(1) go func(path string) { defer wg.Done() - sem <- struct{}{} defer func() { <-sem }() size := calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath) From b8ab9511fd0e45212996055bdb089b79585ce50e Mon Sep 17 00:00:00 2001 From: Jack Phallen Date: Sun, 11 Jan 2026 10:56:18 -0500 Subject: [PATCH 2/2] Create separate limiter for du subprocesses Introduce duSem to cap concurrent du subprocess spawns while keeping the existing sem to limit overall scan fan-out (dir recursion/task concurrency). This prevents du from monopolizing scan concurrency and reduces resource spikes during folded-dir sizing. --- cmd/analyze/scanner.go | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/cmd/analyze/scanner.go b/cmd/analyze/scanner.go index a417ee5..a1c3c30 100644 --- a/cmd/analyze/scanner.go +++ b/cmd/analyze/scanner.go @@ -50,6 +50,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in numWorkers = 1 } sem := make(chan struct{}, numWorkers) + duSem := make(chan struct{}, min(4, runtime.NumCPU())) var wg sync.WaitGroup // Collect results via channels. @@ -138,7 +139,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in } else if cached, err := loadCacheFromDisk(path); err == nil { size = cached.TotalSize } else { - size = calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath) + size = calculateDirSizeConcurrent(path, largeFileChan, duSem, filesScanned, dirsScanned, bytesScanned, currentPath) } atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) @@ -162,7 +163,11 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in defer wg.Done() defer func() { <-sem }() - size, err := getDirectorySizeFromDu(path) + size, err := func() (int64, error) { + duSem <- struct{}{} + defer func() { <-duSem }() + return getDirectorySizeFromDu(path) + }() if err != nil || size <= 0 { size = calculateDirSizeFast(path, filesScanned, dirsScanned, bytesScanned, currentPath) } @@ -186,7 +191,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in defer wg.Done() defer func() { <-sem }() - size := calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath) + size := calculateDirSizeConcurrent(path, largeFileChan, duSem, filesScanned, dirsScanned, bytesScanned, currentPath) atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) @@ -416,7 +421,7 @@ func isInFoldedDir(path string) bool { return false } -func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, filesScanned, dirsScanned, bytesScanned *int64, currentPath *string) int64 { +func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, duSem chan struct{}, filesScanned, dirsScanned, bytesScanned *int64, currentPath *string) int64 { children, err := os.ReadDir(root) if err != nil { return 0 @@ -451,7 +456,11 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, fil go func(path string) { defer wg.Done() defer func() { <-sem }() - size, err := getDirectorySizeFromDu(path) + size, err := func() (int64, error) { + duSem <- struct{}{} + defer func() { <-duSem }() + return getDirectorySizeFromDu(path) + }() if err == nil && size > 0 { atomic.AddInt64(&total, size) atomic.AddInt64(bytesScanned, size) @@ -467,7 +476,7 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, fil defer wg.Done() defer func() { <-sem }() - size := calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath) + size := calculateDirSizeConcurrent(path, largeFileChan, duSem, filesScanned, dirsScanned, bytesScanned, currentPath) atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) }(fullPath)