From b8ab9511fd0e45212996055bdb089b79585ce50e Mon Sep 17 00:00:00 2001 From: Jack Phallen Date: Sun, 11 Jan 2026 10:56:18 -0500 Subject: [PATCH] Create separate limiter for du subprocesses Introduce duSem to cap concurrent du subprocess spawns while keeping the existing sem to limit overall scan fan-out (dir recursion/task concurrency). This prevents du from monopolizing scan concurrency and reduces resource spikes during folded-dir sizing. --- cmd/analyze/scanner.go | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/cmd/analyze/scanner.go b/cmd/analyze/scanner.go index a417ee5..a1c3c30 100644 --- a/cmd/analyze/scanner.go +++ b/cmd/analyze/scanner.go @@ -50,6 +50,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in numWorkers = 1 } sem := make(chan struct{}, numWorkers) + duSem := make(chan struct{}, min(4, runtime.NumCPU())) var wg sync.WaitGroup // Collect results via channels. @@ -138,7 +139,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in } else if cached, err := loadCacheFromDisk(path); err == nil { size = cached.TotalSize } else { - size = calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath) + size = calculateDirSizeConcurrent(path, largeFileChan, duSem, filesScanned, dirsScanned, bytesScanned, currentPath) } atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) @@ -162,7 +163,11 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in defer wg.Done() defer func() { <-sem }() - size, err := getDirectorySizeFromDu(path) + size, err := func() (int64, error) { + duSem <- struct{}{} + defer func() { <-duSem }() + return getDirectorySizeFromDu(path) + }() if err != nil || size <= 0 { size = calculateDirSizeFast(path, filesScanned, dirsScanned, bytesScanned, currentPath) } @@ -186,7 +191,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in defer wg.Done() defer func() { <-sem }() - size := calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath) + size := calculateDirSizeConcurrent(path, largeFileChan, duSem, filesScanned, dirsScanned, bytesScanned, currentPath) atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) @@ -416,7 +421,7 @@ func isInFoldedDir(path string) bool { return false } -func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, filesScanned, dirsScanned, bytesScanned *int64, currentPath *string) int64 { +func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, duSem chan struct{}, filesScanned, dirsScanned, bytesScanned *int64, currentPath *string) int64 { children, err := os.ReadDir(root) if err != nil { return 0 @@ -451,7 +456,11 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, fil go func(path string) { defer wg.Done() defer func() { <-sem }() - size, err := getDirectorySizeFromDu(path) + size, err := func() (int64, error) { + duSem <- struct{}{} + defer func() { <-duSem }() + return getDirectorySizeFromDu(path) + }() if err == nil && size > 0 { atomic.AddInt64(&total, size) atomic.AddInt64(bytesScanned, size) @@ -467,7 +476,7 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, fil defer wg.Done() defer func() { <-sem }() - size := calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath) + size := calculateDirSizeConcurrent(path, largeFileChan, duSem, filesScanned, dirsScanned, bytesScanned, currentPath) atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) }(fullPath)