diff --git a/cmd/analyze/constants.go b/cmd/analyze/constants.go index 7106087..7376daf 100644 --- a/cmd/analyze/constants.go +++ b/cmd/analyze/constants.go @@ -4,9 +4,9 @@ import "time" const ( maxEntries = 30 - maxLargeFiles = 30 + maxLargeFiles = 20 barWidth = 24 - minLargeFileSize = 100 << 20 + spotlightMinFileSize = 100 << 20 defaultViewport = 12 overviewCacheTTL = 7 * 24 * time.Hour overviewCacheFile = "overview_sizes.json" diff --git a/cmd/analyze/scanner.go b/cmd/analyze/scanner.go index 2f0f797..74909da 100644 --- a/cmd/analyze/scanner.go +++ b/cmd/analyze/scanner.go @@ -37,6 +37,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in largeFilesHeap := &largeFileHeap{} heap.Init(largeFilesHeap) + largeFileMinSize := int64(0) // Worker pool sized for I/O-bound scanning. numWorkers := max(runtime.NumCPU()*cpuMultiplier, minWorkers) @@ -84,9 +85,13 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in for file := range largeFileChan { if largeFilesHeap.Len() < maxLargeFiles { heap.Push(largeFilesHeap, file) + if largeFilesHeap.Len() == maxLargeFiles { + atomic.StoreInt64(&largeFileMinSize, (*largeFilesHeap)[0].Size) + } } else if file.Size > (*largeFilesHeap)[0].Size { heap.Pop(largeFilesHeap) heap.Push(largeFilesHeap, file) + atomic.StoreInt64(&largeFileMinSize, (*largeFilesHeap)[0].Size) } } }() @@ -148,7 +153,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in } else if cached, err := loadCacheFromDisk(path); err == nil { size = cached.TotalSize } else { - size = calculateDirSizeConcurrent(path, largeFileChan, duSem, duQueueSem, filesScanned, dirsScanned, bytesScanned, currentPath) + size = calculateDirSizeConcurrent(path, largeFileChan, &largeFileMinSize, duSem, duQueueSem, filesScanned, dirsScanned, bytesScanned, currentPath) } atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) @@ -200,7 +205,7 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in defer wg.Done() defer func() { <-sem }() - size := calculateDirSizeConcurrent(path, largeFileChan, duSem, duQueueSem, filesScanned, dirsScanned, bytesScanned, currentPath) + size := calculateDirSizeConcurrent(path, largeFileChan, &largeFileMinSize, duSem, duQueueSem, filesScanned, dirsScanned, bytesScanned, currentPath) atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) @@ -233,8 +238,11 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in LastAccess: getLastAccessTimeFromInfo(info), } // Track large files only. - if !shouldSkipFileForLargeTracking(fullPath) && size >= minLargeFileSize { - largeFileChan <- fileEntry{Name: child.Name(), Path: fullPath, Size: size} + if !shouldSkipFileForLargeTracking(fullPath) { + minSize := atomic.LoadInt64(&largeFileMinSize) + if size >= minSize { + largeFileChan <- fileEntry{Name: child.Name(), Path: fullPath, Size: size} + } } } @@ -256,8 +264,8 @@ func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *in largeFiles[i] = heap.Pop(largeFilesHeap).(fileEntry) } - // Use Spotlight for large files when available. - if spotlightFiles := findLargeFilesWithSpotlight(root, minLargeFileSize); len(spotlightFiles) > 0 { + // Use Spotlight for large files when it expands the list. + if spotlightFiles := findLargeFilesWithSpotlight(root, spotlightMinFileSize); len(spotlightFiles) > len(largeFiles) { largeFiles = spotlightFiles } @@ -430,7 +438,7 @@ func isInFoldedDir(path string) bool { return false } -func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, duSem, duQueueSem chan struct{}, filesScanned, dirsScanned, bytesScanned *int64, currentPath *atomic.Value) int64 { +func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, largeFileMinSize *int64, duSem, duQueueSem chan struct{}, filesScanned, dirsScanned, bytesScanned *int64, currentPath *atomic.Value) int64 { children, err := os.ReadDir(root) if err != nil { return 0 @@ -488,7 +496,7 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, duS defer wg.Done() defer func() { <-sem }() - size := calculateDirSizeConcurrent(path, largeFileChan, duSem, duQueueSem, filesScanned, dirsScanned, bytesScanned, currentPath) + size := calculateDirSizeConcurrent(path, largeFileChan, largeFileMinSize, duSem, duQueueSem, filesScanned, dirsScanned, bytesScanned, currentPath) atomic.AddInt64(&total, size) atomic.AddInt64(dirsScanned, 1) }(fullPath) @@ -505,8 +513,11 @@ func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, duS atomic.AddInt64(filesScanned, 1) atomic.AddInt64(bytesScanned, size) - if !shouldSkipFileForLargeTracking(fullPath) && size >= minLargeFileSize { - largeFileChan <- fileEntry{Name: child.Name(), Path: fullPath, Size: size} + if !shouldSkipFileForLargeTracking(fullPath) && largeFileMinSize != nil { + minSize := atomic.LoadInt64(largeFileMinSize) + if size >= minSize { + largeFileChan <- fileEntry{Name: child.Name(), Path: fullPath, Size: size} + } } // Update current path occasionally to prevent UI jitter. diff --git a/cmd/analyze/view.go b/cmd/analyze/view.go index b92678b..726f1a9 100644 --- a/cmd/analyze/view.go +++ b/cmd/analyze/view.go @@ -112,7 +112,7 @@ func (m model) View() string { if m.showLargeFiles { if len(m.largeFiles) == 0 { - fmt.Fprintln(&b, " No large files found (>=100MB)") + fmt.Fprintln(&b, " No large files found") } else { viewport := calculateViewport(m.height, true) start := max(m.largeOffset, 0)