1
0
mirror of https://github.com/tw93/Mole.git synced 2026-02-04 11:31:46 +00:00

Refactor Go analysis capabilities

This commit is contained in:
Tw93
2025-11-18 18:23:33 +08:00
parent dc3edfa625
commit a900d03bef
8 changed files with 1425 additions and 1418 deletions

Binary file not shown.

260
cmd/analyze/cache.go Normal file
View File

@@ -0,0 +1,260 @@
package main
import (
"encoding/gob"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/cespare/xxhash/v2"
)
type overviewSizeSnapshot struct {
Size int64 `json:"size"`
Updated time.Time `json:"updated"`
}
var (
overviewSnapshotMu sync.Mutex
overviewSnapshotCache map[string]overviewSizeSnapshot
overviewSnapshotLoaded bool
)
func snapshotFromModel(m model) historyEntry {
return historyEntry{
path: m.path,
entries: cloneDirEntries(m.entries),
largeFiles: cloneFileEntries(m.largeFiles),
totalSize: m.totalSize,
selected: m.selected,
entryOffset: m.offset,
largeSelected: m.largeSelected,
largeOffset: m.largeOffset,
}
}
func cacheSnapshot(m model) historyEntry {
entry := snapshotFromModel(m)
entry.dirty = false
return entry
}
func cloneDirEntries(entries []dirEntry) []dirEntry {
if len(entries) == 0 {
return nil
}
copied := make([]dirEntry, len(entries))
copy(copied, entries)
return copied
}
func cloneFileEntries(files []fileEntry) []fileEntry {
if len(files) == 0 {
return nil
}
copied := make([]fileEntry, len(files))
copy(copied, files)
return copied
}
func ensureOverviewSnapshotCacheLocked() error {
if overviewSnapshotLoaded {
return nil
}
storePath, err := getOverviewSizeStorePath()
if err != nil {
return err
}
data, err := os.ReadFile(storePath)
if err != nil {
if os.IsNotExist(err) {
overviewSnapshotCache = make(map[string]overviewSizeSnapshot)
overviewSnapshotLoaded = true
return nil
}
return err
}
if len(data) == 0 {
overviewSnapshotCache = make(map[string]overviewSizeSnapshot)
overviewSnapshotLoaded = true
return nil
}
var snapshots map[string]overviewSizeSnapshot
if err := json.Unmarshal(data, &snapshots); err != nil || snapshots == nil {
backupPath := storePath + ".corrupt"
_ = os.Rename(storePath, backupPath)
overviewSnapshotCache = make(map[string]overviewSizeSnapshot)
overviewSnapshotLoaded = true
return nil
}
overviewSnapshotCache = snapshots
overviewSnapshotLoaded = true
return nil
}
func getOverviewSizeStorePath() (string, error) {
cacheDir, err := getCacheDir()
if err != nil {
return "", err
}
return filepath.Join(cacheDir, overviewCacheFile), nil
}
func loadStoredOverviewSize(path string) (int64, error) {
if path == "" {
return 0, fmt.Errorf("empty path")
}
overviewSnapshotMu.Lock()
defer overviewSnapshotMu.Unlock()
if err := ensureOverviewSnapshotCacheLocked(); err != nil {
return 0, err
}
if overviewSnapshotCache == nil {
return 0, fmt.Errorf("snapshot cache unavailable")
}
if snapshot, ok := overviewSnapshotCache[path]; ok && snapshot.Size > 0 {
if time.Since(snapshot.Updated) < overviewCacheTTL {
return snapshot.Size, nil
}
return 0, fmt.Errorf("snapshot expired")
}
return 0, fmt.Errorf("snapshot not found")
}
func storeOverviewSize(path string, size int64) error {
if path == "" || size <= 0 {
return fmt.Errorf("invalid overview size")
}
overviewSnapshotMu.Lock()
defer overviewSnapshotMu.Unlock()
if err := ensureOverviewSnapshotCacheLocked(); err != nil {
return err
}
if overviewSnapshotCache == nil {
overviewSnapshotCache = make(map[string]overviewSizeSnapshot)
}
overviewSnapshotCache[path] = overviewSizeSnapshot{
Size: size,
Updated: time.Now(),
}
return persistOverviewSnapshotLocked()
}
func persistOverviewSnapshotLocked() error {
storePath, err := getOverviewSizeStorePath()
if err != nil {
return err
}
tmpPath := storePath + ".tmp"
data, err := json.MarshalIndent(overviewSnapshotCache, "", " ")
if err != nil {
return err
}
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
return err
}
return os.Rename(tmpPath, storePath)
}
func loadOverviewCachedSize(path string) (int64, error) {
if path == "" {
return 0, fmt.Errorf("empty path")
}
if snapshot, err := loadStoredOverviewSize(path); err == nil {
return snapshot, nil
}
cacheEntry, err := loadCacheFromDisk(path)
if err != nil {
return 0, err
}
_ = storeOverviewSize(path, cacheEntry.TotalSize)
return cacheEntry.TotalSize, nil
}
func getCacheDir() (string, error) {
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
cacheDir := filepath.Join(home, ".cache", "mole")
if err := os.MkdirAll(cacheDir, 0755); err != nil {
return "", err
}
return cacheDir, nil
}
func getCachePath(path string) (string, error) {
cacheDir, err := getCacheDir()
if err != nil {
return "", err
}
hash := xxhash.Sum64String(path)
filename := fmt.Sprintf("%x.cache", hash)
return filepath.Join(cacheDir, filename), nil
}
func loadCacheFromDisk(path string) (*cacheEntry, error) {
cachePath, err := getCachePath(path)
if err != nil {
return nil, err
}
file, err := os.Open(cachePath)
if err != nil {
return nil, err
}
defer file.Close()
var entry cacheEntry
decoder := gob.NewDecoder(file)
if err := decoder.Decode(&entry); err != nil {
return nil, err
}
info, err := os.Stat(path)
if err != nil {
return nil, err
}
if info.ModTime().After(entry.ModTime) {
return nil, fmt.Errorf("cache expired: directory modified")
}
if time.Since(entry.ScanTime) > 7*24*time.Hour {
return nil, fmt.Errorf("cache expired: too old")
}
return &entry, nil
}
func saveCacheToDisk(path string, result scanResult) error {
cachePath, err := getCachePath(path)
if err != nil {
return err
}
info, err := os.Stat(path)
if err != nil {
return err
}
entry := cacheEntry{
Entries: result.entries,
LargeFiles: result.largeFiles,
TotalSize: result.totalSize,
ModTime: info.ModTime(),
ScanTime: time.Now(),
}
file, err := os.Create(cachePath)
if err != nil {
return err
}
defer file.Close()
encoder := gob.NewEncoder(file)
return encoder.Encode(entry)
}

232
cmd/analyze/constants.go Normal file
View File

@@ -0,0 +1,232 @@
package main
import "time"
const (
maxEntries = 30
maxLargeFiles = 30
barWidth = 24
minLargeFileSize = 100 << 20 // 100 MB
entryViewport = 10
largeViewport = 10
overviewCacheTTL = 7 * 24 * time.Hour // 7 days
overviewCacheFile = "overview_sizes.json"
duTimeout = 60 * time.Second // Increased for large directories
mdlsTimeout = 5 * time.Second
maxConcurrentOverview = 3 // Scan up to 3 overview dirs concurrently
pathUpdateInterval = 500 // Update current path every N files
batchUpdateSize = 100 // Batch atomic updates every N items
)
var foldDirs = map[string]bool{
// Version control
".git": true,
".svn": true,
".hg": true,
// JavaScript/Node
"node_modules": true,
".npm": true,
"_npx": true, // ~/.npm/_npx global cache
"_cacache": true, // ~/.npm/_cacache
"_logs": true,
"_locks": true,
"_quick": true,
"_libvips": true,
"_prebuilds": true,
"_update-notifier-last-checked": true,
".yarn": true,
".pnpm-store": true,
".next": true,
".nuxt": true,
"bower_components": true,
".vite": true,
".turbo": true,
".parcel-cache": true,
".nx": true,
".rush": true,
"tnpm": true,
".tnpm": true,
".bun": true,
".deno": true,
// Python
"__pycache__": true,
".pytest_cache": true,
".mypy_cache": true,
".ruff_cache": true,
"venv": true,
".venv": true,
"virtualenv": true,
".tox": true,
"site-packages": true,
".eggs": true,
"*.egg-info": true,
".pyenv": true,
".poetry": true,
".pip": true,
".pipx": true,
// Ruby/Go/PHP (vendor), Java/Kotlin/Scala/Rust (target)
"vendor": true,
".bundle": true,
"gems": true,
".rbenv": true,
"target": true,
".gradle": true,
".m2": true,
".ivy2": true,
"out": true,
"pkg": true,
"composer.phar": true,
".composer": true,
".cargo": true,
// Build outputs
"build": true,
"dist": true,
".output": true,
"coverage": true,
".coverage": true,
// IDE
".idea": true,
".vscode": true,
".vs": true,
".fleet": true,
// Cache directories
".cache": true,
"__MACOSX": true,
".DS_Store": true,
".Trash": true,
"Caches": true,
".Spotlight-V100": true,
".fseventsd": true,
".DocumentRevisions-V100": true,
".TemporaryItems": true,
"$RECYCLE.BIN": true,
".temp": true,
".tmp": true,
"_temp": true,
"_tmp": true,
".Homebrew": true,
".rustup": true,
".sdkman": true,
".nvm": true,
// macOS specific
"Application Scripts": true,
"Saved Application State": true,
// iCloud
"Mobile Documents": true,
// Docker & Containers
".docker": true,
".containerd": true,
// Mobile development
"Pods": true,
"DerivedData": true,
".build": true,
"xcuserdata": true,
"Carthage": true,
// Web frameworks
".angular": true,
".svelte-kit": true,
".astro": true,
".solid": true,
// Databases
".mysql": true,
".postgres": true,
"mongodb": true,
// Other
".terraform": true,
".vagrant": true,
"tmp": true,
"temp": true,
}
var skipSystemDirs = map[string]bool{
"dev": true,
"tmp": true,
"private": true,
"cores": true,
"net": true,
"home": true,
"System": true,
"sbin": true,
"bin": true,
"etc": true,
"var": true,
".vol": true,
".Spotlight-V100": true,
".fseventsd": true,
".DocumentRevisions-V100": true,
".TemporaryItems": true,
}
var skipExtensions = map[string]bool{
".go": true,
".js": true,
".ts": true,
".tsx": true,
".jsx": true,
".json": true,
".md": true,
".txt": true,
".yml": true,
".yaml": true,
".xml": true,
".html": true,
".css": true,
".scss": true,
".sass": true,
".less": true,
".py": true,
".rb": true,
".java": true,
".kt": true,
".rs": true,
".swift": true,
".m": true,
".mm": true,
".c": true,
".cpp": true,
".h": true,
".hpp": true,
".cs": true,
".sql": true,
".db": true,
".lock": true,
".gradle": true,
".mjs": true,
".cjs": true,
".coffee": true,
".dart": true,
".svelte": true,
".vue": true,
".nim": true,
".hx": true,
}
var spinnerFrames = []string{"|", "/", "-", "\\", "|", "/", "-", "\\"}
const (
colorPurple = "\033[0;35m"
colorBlue = "\033[0;34m"
colorGray = "\033[0;90m"
colorRed = "\033[0;31m"
colorYellow = "\033[1;33m"
colorGreen = "\033[0;32m"
colorCyan = "\033[0;36m"
colorReset = "\033[0m"
colorBold = "\033[1m"
colorBgCyan = "\033[46m"
colorBgDark = "\033[100m"
colorInvert = "\033[7m"
)

52
cmd/analyze/delete.go Normal file
View File

@@ -0,0 +1,52 @@
package main
import (
"io/fs"
"os"
"path/filepath"
"sync/atomic"
tea "github.com/charmbracelet/bubbletea"
)
func deletePathCmd(path string, counter *int64) tea.Cmd {
return func() tea.Msg {
count, err := deletePathWithProgress(path, counter)
return deleteProgressMsg{
done: true,
err: err,
count: count,
}
}
}
func deletePathWithProgress(root string, counter *int64) (int64, error) {
var count int64
err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return nil
}
if !d.IsDir() {
if removeErr := os.Remove(path); removeErr == nil {
count++
if counter != nil {
atomic.StoreInt64(counter, count)
}
}
}
return nil
})
if err != nil {
return count, err
}
if err := os.RemoveAll(root); err != nil {
return count, err
}
return count, nil
}

245
cmd/analyze/format.go Normal file
View File

@@ -0,0 +1,245 @@
package main
import (
"fmt"
"os"
"strings"
"time"
)
func displayPath(path string) string {
home, err := os.UserHomeDir()
if err != nil || home == "" {
return path
}
if strings.HasPrefix(path, home) {
return strings.Replace(path, home, "~", 1)
}
return path
}
// truncateMiddle truncates string in the middle, keeping head and tail.
func truncateMiddle(s string, maxWidth int) string {
runes := []rune(s)
currentWidth := displayWidth(s)
if currentWidth <= maxWidth {
return s
}
// Reserve 3 width for "..."
if maxWidth < 10 {
// Simple truncation for very small width
width := 0
for i, r := range runes {
width += runeWidth(r)
if width > maxWidth {
return string(runes[:i])
}
}
return s
}
// Keep more of the tail (filename usually more important)
targetHeadWidth := (maxWidth - 3) / 3
targetTailWidth := maxWidth - 3 - targetHeadWidth
// Find head cutoff point based on display width
headWidth := 0
headIdx := 0
for i, r := range runes {
w := runeWidth(r)
if headWidth+w > targetHeadWidth {
break
}
headWidth += w
headIdx = i + 1
}
// Find tail cutoff point
tailWidth := 0
tailIdx := len(runes)
for i := len(runes) - 1; i >= 0; i-- {
w := runeWidth(runes[i])
if tailWidth+w > targetTailWidth {
break
}
tailWidth += w
tailIdx = i
}
return string(runes[:headIdx]) + "..." + string(runes[tailIdx:])
}
func formatNumber(n int64) string {
if n < 1000 {
return fmt.Sprintf("%d", n)
}
if n < 1000000 {
return fmt.Sprintf("%.1fk", float64(n)/1000)
}
return fmt.Sprintf("%.1fM", float64(n)/1000000)
}
func humanizeBytes(size int64) string {
if size < 0 {
return "0 B"
}
const unit = 1024
if size < unit {
return fmt.Sprintf("%d B", size)
}
div, exp := int64(unit), 0
for n := size / unit; n >= unit; n /= unit {
div *= unit
exp++
}
value := float64(size) / float64(div)
return fmt.Sprintf("%.1f %cB", value, "KMGTPE"[exp])
}
func progressBar(value, max int64) string {
if max <= 0 {
return strings.Repeat("░", barWidth)
}
filled := int((value * int64(barWidth)) / max)
if filled > barWidth {
filled = barWidth
}
bar := strings.Repeat("█", filled)
if filled < barWidth {
bar += strings.Repeat("░", barWidth-filled)
}
return bar
}
func coloredProgressBar(value, max int64, percent float64) string {
if max <= 0 {
return colorGray + strings.Repeat("░", barWidth) + colorReset
}
filled := int((value * int64(barWidth)) / max)
if filled > barWidth {
filled = barWidth
}
// Choose color based on percentage
var barColor string
if percent >= 50 {
barColor = colorRed
} else if percent >= 20 {
barColor = colorYellow
} else if percent >= 5 {
barColor = colorCyan
} else {
barColor = colorGreen
}
bar := barColor
for i := 0; i < barWidth; i++ {
if i < filled {
if i < filled-1 {
bar += "█"
} else {
remainder := (value * int64(barWidth)) % max
if remainder > max/2 {
bar += "█"
} else if remainder > max/4 {
bar += "▓"
} else {
bar += "▒"
}
}
} else {
bar += colorGray + "░" + barColor
}
}
return bar + colorReset
}
// Calculate display width considering CJK characters.
func runeWidth(r rune) int {
if r >= 0x4E00 && r <= 0x9FFF ||
r >= 0x3400 && r <= 0x4DBF ||
r >= 0xAC00 && r <= 0xD7AF ||
r >= 0xFF00 && r <= 0xFFEF {
return 2
}
return 1
}
func displayWidth(s string) int {
width := 0
for _, r := range s {
width += runeWidth(r)
}
return width
}
func trimName(name string) string {
const (
maxWidth = 28
ellipsis = "..."
ellipsisWidth = 3
)
runes := []rune(name)
widths := make([]int, len(runes))
for i, r := range runes {
widths[i] = runeWidth(r)
}
currentWidth := 0
for i, w := range widths {
if currentWidth+w > maxWidth {
subWidth := currentWidth
j := i
for j > 0 && subWidth+ellipsisWidth > maxWidth {
j--
subWidth -= widths[j]
}
if j == 0 {
return ellipsis
}
return string(runes[:j]) + ellipsis
}
currentWidth += w
}
return name
}
func padName(name string, targetWidth int) string {
currentWidth := displayWidth(name)
if currentWidth >= targetWidth {
return name
}
return name + strings.Repeat(" ", targetWidth-currentWidth)
}
// formatUnusedTime formats the time since last access in a compact way.
func formatUnusedTime(lastAccess time.Time) string {
if lastAccess.IsZero() {
return ""
}
duration := time.Since(lastAccess)
days := int(duration.Hours() / 24)
if days < 90 {
return ""
}
months := days / 30
years := days / 365
if years >= 2 {
return fmt.Sprintf(">%dyr", years)
} else if years >= 1 {
return ">1yr"
} else if months >= 3 {
return fmt.Sprintf(">%dmo", months)
}
return ""
}

File diff suppressed because it is too large Load Diff

634
cmd/analyze/scanner.go Normal file
View File

@@ -0,0 +1,634 @@
package main
import (
"bytes"
"context"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"golang.org/x/sync/singleflight"
)
var scanGroup singleflight.Group
func scanPathConcurrent(root string, filesScanned, dirsScanned, bytesScanned *int64, currentPath *string) (scanResult, error) {
children, err := os.ReadDir(root)
if err != nil {
return scanResult{}, err
}
var total int64
entries := make([]dirEntry, 0, len(children))
largeFiles := make([]fileEntry, 0, maxLargeFiles*2)
// Use worker pool for concurrent directory scanning
// For I/O-bound operations, use more workers than CPU count
maxWorkers := runtime.NumCPU() * 4
if maxWorkers < 16 {
maxWorkers = 16 // Minimum 16 workers for better I/O throughput
}
// Cap at 128 to avoid excessive goroutines
if maxWorkers > 128 {
maxWorkers = 128
}
if maxWorkers > len(children) {
maxWorkers = len(children)
}
if maxWorkers < 1 {
maxWorkers = 1
}
sem := make(chan struct{}, maxWorkers)
var wg sync.WaitGroup
// Use channels to collect results without lock contention
entryChan := make(chan dirEntry, len(children))
largeFileChan := make(chan fileEntry, maxLargeFiles*2)
// Start goroutines to collect from channels
var collectorWg sync.WaitGroup
collectorWg.Add(2)
go func() {
defer collectorWg.Done()
for entry := range entryChan {
entries = append(entries, entry)
}
}()
go func() {
defer collectorWg.Done()
for file := range largeFileChan {
largeFiles = append(largeFiles, file)
}
}()
isRootDir := root == "/"
for _, child := range children {
fullPath := filepath.Join(root, child.Name())
if child.IsDir() {
// In root directory, skip system directories completely
if isRootDir && skipSystemDirs[child.Name()] {
continue
}
// For folded directories, calculate size quickly without expanding
if shouldFoldDirWithPath(child.Name(), fullPath) {
wg.Add(1)
go func(name, path string) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
// Try du command first for folded dirs (much faster)
size := calculateDirSizeWithDu(path)
if size <= 0 {
// Fallback to walk if du fails
size = calculateDirSizeFast(path, filesScanned, dirsScanned, bytesScanned, currentPath)
}
atomic.AddInt64(&total, size)
atomic.AddInt64(dirsScanned, 1)
entryChan <- dirEntry{
name: name,
path: path,
size: size,
isDir: true,
lastAccess: time.Time{}, // Lazy load when displayed
}
}(child.Name(), fullPath)
continue
}
// Normal directory: full scan with detail
wg.Add(1)
go func(name, path string) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
size := calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath)
atomic.AddInt64(&total, size)
atomic.AddInt64(dirsScanned, 1)
entryChan <- dirEntry{
name: name,
path: path,
size: size,
isDir: true,
lastAccess: time.Time{}, // Lazy load when displayed
}
}(child.Name(), fullPath)
continue
}
info, err := child.Info()
if err != nil {
continue
}
// Get actual disk usage for sparse files and cloud files
size := getActualFileSize(fullPath, info)
atomic.AddInt64(&total, size)
atomic.AddInt64(filesScanned, 1)
atomic.AddInt64(bytesScanned, size)
entryChan <- dirEntry{
name: child.Name(),
path: fullPath,
size: size,
isDir: false,
lastAccess: getLastAccessTimeFromInfo(info),
}
// Only track large files that are not code/text files
if !shouldSkipFileForLargeTracking(fullPath) && size >= minLargeFileSize {
largeFileChan <- fileEntry{name: child.Name(), path: fullPath, size: size}
}
}
wg.Wait()
// Close channels and wait for collectors to finish
close(entryChan)
close(largeFileChan)
collectorWg.Wait()
sort.Slice(entries, func(i, j int) bool {
return entries[i].size > entries[j].size
})
if len(entries) > maxEntries {
entries = entries[:maxEntries]
}
// Try to use Spotlight for faster large file discovery
if spotlightFiles := findLargeFilesWithSpotlight(root, minLargeFileSize); len(spotlightFiles) > 0 {
largeFiles = spotlightFiles
} else {
// Sort and trim large files collected from scanning
sort.Slice(largeFiles, func(i, j int) bool {
return largeFiles[i].size > largeFiles[j].size
})
if len(largeFiles) > maxLargeFiles {
largeFiles = largeFiles[:maxLargeFiles]
}
}
return scanResult{
entries: entries,
largeFiles: largeFiles,
totalSize: total,
}, nil
}
func shouldFoldDir(name string) bool {
return foldDirs[name]
}
// shouldFoldDirWithPath checks if a directory should be folded based on path context
func shouldFoldDirWithPath(name, path string) bool {
// Check basic fold list first
if foldDirs[name] {
return true
}
// Special case: npm cache directories - fold all subdirectories
// This includes: .npm/_quick/*, .npm/_cacache/*, .npm/a-z/*, .tnpm/*
if strings.Contains(path, "/.npm/") || strings.Contains(path, "/.tnpm/") {
// Get the parent directory name
parent := filepath.Base(filepath.Dir(path))
// If parent is a cache folder (_quick, _cacache, etc) or npm dir itself, fold it
if parent == ".npm" || parent == ".tnpm" || strings.HasPrefix(parent, "_") {
return true
}
// Also fold single-letter subdirectories (npm cache structure like .npm/a/, .npm/b/)
if len(name) == 1 {
return true
}
}
return false
}
// calculateDirSizeWithDu uses du command for fast directory size calculation
// Returns size in bytes, or 0 if command fails
func calculateDirSizeWithDu(path string) int64 {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
// Use -sk for 1K-block output, then convert to bytes
// macOS du doesn't support -b flag
cmd := exec.CommandContext(ctx, "du", "-sk", path)
output, err := cmd.Output()
if err != nil {
return 0
}
fields := strings.Fields(string(output))
if len(fields) < 1 {
return 0
}
kb, err := strconv.ParseInt(fields[0], 10, 64)
if err != nil {
return 0
}
return kb * 1024
}
func shouldSkipFileForLargeTracking(path string) bool {
ext := strings.ToLower(filepath.Ext(path))
return skipExtensions[ext]
}
// calculateDirSizeFast performs fast directory size calculation without detailed tracking or large file detection.
// Updates progress counters in batches to reduce atomic operation overhead.
func calculateDirSizeFast(root string, filesScanned, dirsScanned, bytesScanned *int64, currentPath *string) int64 {
var total int64
var localFiles, localDirs int64
var batchBytes int64
// Create context with timeout
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
walkFunc := func(path string, d fs.DirEntry, err error) error {
// Check for timeout
select {
case <-ctx.Done():
return ctx.Err()
default:
}
if err != nil {
return nil
}
if d.IsDir() {
localDirs++
// Batch update every N dirs to reduce atomic operations
if localDirs%batchUpdateSize == 0 {
atomic.AddInt64(dirsScanned, batchUpdateSize)
localDirs = 0
}
return nil
}
info, err := d.Info()
if err != nil {
return nil
}
// Get actual disk usage for sparse files and cloud files
size := getActualFileSize(path, info)
total += size
batchBytes += size
localFiles++
if currentPath != nil {
*currentPath = path
}
// Batch update every N files to reduce atomic operations
if localFiles%batchUpdateSize == 0 {
atomic.AddInt64(filesScanned, batchUpdateSize)
atomic.AddInt64(bytesScanned, batchBytes)
localFiles = 0
batchBytes = 0
}
return nil
}
_ = filepath.WalkDir(root, walkFunc)
// Final update for remaining counts
if localFiles > 0 {
atomic.AddInt64(filesScanned, localFiles)
}
if localDirs > 0 {
atomic.AddInt64(dirsScanned, localDirs)
}
if batchBytes > 0 {
atomic.AddInt64(bytesScanned, batchBytes)
}
return total
}
// Use Spotlight (mdfind) to quickly find large files in a directory
func findLargeFilesWithSpotlight(root string, minSize int64) []fileEntry {
// mdfind query: files >= minSize in the specified directory
query := fmt.Sprintf("kMDItemFSSize >= %d", minSize)
cmd := exec.Command("mdfind", "-onlyin", root, query)
output, err := cmd.Output()
if err != nil {
// Fallback: mdfind not available or failed
return nil
}
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
var files []fileEntry
for _, line := range lines {
if line == "" {
continue
}
// Filter out code files first (cheapest check, no I/O)
if shouldSkipFileForLargeTracking(line) {
continue
}
// Filter out files in folded directories (cheap string check)
if isInFoldedDir(line) {
continue
}
// Use Lstat instead of Stat (faster, doesn't follow symlinks)
info, err := os.Lstat(line)
if err != nil {
continue
}
// Skip if it's a directory or symlink
if info.IsDir() || info.Mode()&os.ModeSymlink != 0 {
continue
}
// Get actual disk usage for sparse files and cloud files
actualSize := getActualFileSize(line, info)
files = append(files, fileEntry{
name: filepath.Base(line),
path: line,
size: actualSize,
})
}
// Sort by size (descending)
sort.Slice(files, func(i, j int) bool {
return files[i].size > files[j].size
})
// Return top N
if len(files) > maxLargeFiles {
files = files[:maxLargeFiles]
}
return files
}
// isInFoldedDir checks if a path is inside a folded directory (optimized)
func isInFoldedDir(path string) bool {
// Split path into components for faster checking
parts := strings.Split(path, string(os.PathSeparator))
for _, part := range parts {
if foldDirs[part] {
return true
}
}
return false
}
func calculateDirSizeConcurrent(root string, largeFileChan chan<- fileEntry, filesScanned, dirsScanned, bytesScanned *int64, currentPath *string) int64 {
// Read immediate children
children, err := os.ReadDir(root)
if err != nil {
return 0
}
var total int64
var wg sync.WaitGroup
// Limit concurrent subdirectory scans to avoid too many goroutines
maxConcurrent := runtime.NumCPU() * 2
if maxConcurrent > 32 {
maxConcurrent = 32
}
sem := make(chan struct{}, maxConcurrent)
for _, child := range children {
fullPath := filepath.Join(root, child.Name())
if child.IsDir() {
// Check if this is a folded directory
if shouldFoldDirWithPath(child.Name(), fullPath) {
// Use du for folded directories (much faster)
wg.Add(1)
go func(path string) {
defer wg.Done()
size := calculateDirSizeWithDu(path)
if size > 0 {
atomic.AddInt64(&total, size)
atomic.AddInt64(bytesScanned, size)
atomic.AddInt64(dirsScanned, 1)
}
}(fullPath)
continue
}
// Recursively scan subdirectory in parallel
wg.Add(1)
go func(path string) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
size := calculateDirSizeConcurrent(path, largeFileChan, filesScanned, dirsScanned, bytesScanned, currentPath)
atomic.AddInt64(&total, size)
atomic.AddInt64(dirsScanned, 1)
}(fullPath)
continue
}
// Handle files
info, err := child.Info()
if err != nil {
continue
}
size := getActualFileSize(fullPath, info)
total += size
atomic.AddInt64(filesScanned, 1)
atomic.AddInt64(bytesScanned, size)
// Track large files
if !shouldSkipFileForLargeTracking(fullPath) && size >= minLargeFileSize {
largeFileChan <- fileEntry{name: child.Name(), path: fullPath, size: size}
}
// Update current path
if currentPath != nil {
*currentPath = fullPath
}
}
wg.Wait()
return total
}
// measureOverviewSize calculates the size of a directory using multiple strategies.
func measureOverviewSize(path string) (int64, error) {
if path == "" {
return 0, fmt.Errorf("empty path")
}
path = filepath.Clean(path)
if !filepath.IsAbs(path) {
return 0, fmt.Errorf("path must be absolute: %s", path)
}
if _, err := os.Stat(path); err != nil {
return 0, fmt.Errorf("cannot access path: %v", err)
}
if cached, err := loadStoredOverviewSize(path); err == nil && cached > 0 {
return cached, nil
}
if duSize, err := getDirectorySizeFromDu(path); err == nil && duSize > 0 {
_ = storeOverviewSize(path, duSize)
return duSize, nil
}
if logicalSize, err := getDirectoryLogicalSize(path); err == nil && logicalSize > 0 {
_ = storeOverviewSize(path, logicalSize)
return logicalSize, nil
}
if cached, err := loadCacheFromDisk(path); err == nil {
_ = storeOverviewSize(path, cached.TotalSize)
return cached.TotalSize, nil
}
return 0, fmt.Errorf("unable to measure directory size with fast methods")
}
func getDirectorySizeFromMetadata(path string) (int64, error) {
info, err := os.Stat(path)
if err != nil {
return 0, fmt.Errorf("cannot stat path: %v", err)
}
if !info.IsDir() {
return 0, fmt.Errorf("not a directory")
}
ctx, cancel := context.WithTimeout(context.Background(), mdlsTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "mdls", "-raw", "-name", "kMDItemFSSize", path)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
if ctx.Err() == context.DeadlineExceeded {
return 0, fmt.Errorf("mdls timeout after %v", mdlsTimeout)
}
if stderr.Len() > 0 {
return 0, fmt.Errorf("mdls failed: %v (%s)", err, stderr.String())
}
return 0, fmt.Errorf("mdls failed: %v", err)
}
value := strings.TrimSpace(stdout.String())
if value == "" || value == "(null)" {
return 0, fmt.Errorf("metadata size unavailable")
}
size, err := strconv.ParseInt(value, 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse mdls output: %v", err)
}
if size <= 0 {
return 0, fmt.Errorf("mdls size invalid: %d", size)
}
return size, nil
}
func getDirectorySizeFromDu(path string) (int64, error) {
ctx, cancel := context.WithTimeout(context.Background(), duTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "du", "-sk", path)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
if ctx.Err() == context.DeadlineExceeded {
return 0, fmt.Errorf("du timeout after %v", duTimeout)
}
if stderr.Len() > 0 {
return 0, fmt.Errorf("du failed: %v (%s)", err, stderr.String())
}
return 0, fmt.Errorf("du failed: %v", err)
}
fields := strings.Fields(stdout.String())
if len(fields) == 0 {
return 0, fmt.Errorf("du output empty")
}
kb, err := strconv.ParseInt(fields[0], 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse du output: %v", err)
}
if kb <= 0 {
return 0, fmt.Errorf("du size invalid: %d", kb)
}
return kb * 1024, nil
}
func getDirectoryLogicalSize(path string) (int64, error) {
var total int64
err := filepath.WalkDir(path, func(p string, d fs.DirEntry, err error) error {
if err != nil {
if os.IsPermission(err) {
return filepath.SkipDir
}
return nil
}
if d.IsDir() {
return nil
}
info, err := d.Info()
if err != nil {
return nil
}
total += getActualFileSize(p, info)
return nil
})
if err != nil && err != filepath.SkipDir {
return 0, err
}
return total, nil
}
func getActualFileSize(_ string, info fs.FileInfo) int64 {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return info.Size()
}
actualSize := stat.Blocks * 512
if actualSize < info.Size() {
return actualSize
}
return info.Size()
}
func getLastAccessTime(path string) time.Time {
info, err := os.Stat(path)
if err != nil {
return time.Time{}
}
return getLastAccessTimeFromInfo(info)
}
func getLastAccessTimeFromInfo(info fs.FileInfo) time.Time {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return time.Time{}
}
return time.Unix(stat.Atimespec.Sec, stat.Atimespec.Nsec)
}

View File

@@ -10,11 +10,11 @@ echo "Building analyze-go for multiple architectures..."
# Build for arm64 (Apple Silicon)
echo " → Building for arm64..."
GOARCH=arm64 go build -ldflags="-s -w" -o bin/analyze-go-arm64 cmd/analyze/main.go
GOARCH=arm64 go build -ldflags="-s -w" -o bin/analyze-go-arm64 ./cmd/analyze
# Build for amd64 (Intel)
echo " → Building for amd64..."
GOARCH=amd64 go build -ldflags="-s -w" -o bin/analyze-go-amd64 cmd/analyze/main.go
GOARCH=amd64 go build -ldflags="-s -w" -o bin/analyze-go-amd64 ./cmd/analyze
# Create Universal Binary
echo " → Creating Universal Binary..."