mirror of
https://github.com/tw93/Mole.git
synced 2026-03-24 05:30:07 +00:00
feat(status): alert on persistent high-cpu processes (#602)
* feat(status): alert on persistent high-cpu processes * refactor(status): keep high-cpu alerts read-only * fix(status): address lint and sudo test regressions --------- Co-authored-by: Tw93 <hitw93@gmail.com>
This commit is contained in:
@@ -66,19 +66,21 @@ type MetricsSnapshot struct {
|
||||
HealthScore int `json:"health_score"` // 0-100 system health score
|
||||
HealthScoreMsg string `json:"health_score_msg"` // Brief explanation
|
||||
|
||||
CPU CPUStatus `json:"cpu"`
|
||||
GPU []GPUStatus `json:"gpu"`
|
||||
Memory MemoryStatus `json:"memory"`
|
||||
Disks []DiskStatus `json:"disks"`
|
||||
DiskIO DiskIOStatus `json:"disk_io"`
|
||||
Network []NetworkStatus `json:"network"`
|
||||
NetworkHistory NetworkHistory `json:"network_history"`
|
||||
Proxy ProxyStatus `json:"proxy"`
|
||||
Batteries []BatteryStatus `json:"batteries"`
|
||||
Thermal ThermalStatus `json:"thermal"`
|
||||
Sensors []SensorReading `json:"sensors"`
|
||||
Bluetooth []BluetoothDevice `json:"bluetooth"`
|
||||
TopProcesses []ProcessInfo `json:"top_processes"`
|
||||
CPU CPUStatus `json:"cpu"`
|
||||
GPU []GPUStatus `json:"gpu"`
|
||||
Memory MemoryStatus `json:"memory"`
|
||||
Disks []DiskStatus `json:"disks"`
|
||||
DiskIO DiskIOStatus `json:"disk_io"`
|
||||
Network []NetworkStatus `json:"network"`
|
||||
NetworkHistory NetworkHistory `json:"network_history"`
|
||||
Proxy ProxyStatus `json:"proxy"`
|
||||
Batteries []BatteryStatus `json:"batteries"`
|
||||
Thermal ThermalStatus `json:"thermal"`
|
||||
Sensors []SensorReading `json:"sensors"`
|
||||
Bluetooth []BluetoothDevice `json:"bluetooth"`
|
||||
TopProcesses []ProcessInfo `json:"top_processes"`
|
||||
ProcessWatch ProcessWatchConfig `json:"process_watch"`
|
||||
ProcessAlerts []ProcessAlert `json:"process_alerts"`
|
||||
}
|
||||
|
||||
type HardwareInfo struct {
|
||||
@@ -96,9 +98,12 @@ type DiskIOStatus struct {
|
||||
}
|
||||
|
||||
type ProcessInfo struct {
|
||||
Name string `json:"name"`
|
||||
CPU float64 `json:"cpu"`
|
||||
Memory float64 `json:"memory"`
|
||||
PID int `json:"pid"`
|
||||
PPID int `json:"ppid"`
|
||||
Name string `json:"name"`
|
||||
Command string `json:"command"`
|
||||
CPU float64 `json:"cpu"`
|
||||
Memory float64 `json:"memory"`
|
||||
}
|
||||
|
||||
type CPUStatus struct {
|
||||
@@ -215,13 +220,19 @@ type Collector struct {
|
||||
cachedGPU []GPUStatus
|
||||
prevDiskIO disk.IOCountersStat
|
||||
lastDiskAt time.Time
|
||||
|
||||
watchMu sync.Mutex
|
||||
processWatch ProcessWatchConfig
|
||||
processWatcher *ProcessWatcher
|
||||
}
|
||||
|
||||
func NewCollector() *Collector {
|
||||
func NewCollector(options ProcessWatchOptions) *Collector {
|
||||
return &Collector{
|
||||
prevNet: make(map[string]net.IOCountersStat),
|
||||
rxHistoryBuf: NewRingBuffer(NetworkHistorySize),
|
||||
txHistoryBuf: NewRingBuffer(NetworkHistorySize),
|
||||
prevNet: make(map[string]net.IOCountersStat),
|
||||
rxHistoryBuf: NewRingBuffer(NetworkHistorySize),
|
||||
txHistoryBuf: NewRingBuffer(NetworkHistorySize),
|
||||
processWatch: options.SnapshotConfig(),
|
||||
processWatcher: NewProcessWatcher(options),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -250,7 +261,7 @@ func (c *Collector) Collect() (MetricsSnapshot, error) {
|
||||
sensorStats []SensorReading
|
||||
gpuStats []GPUStatus
|
||||
btStats []BluetoothDevice
|
||||
topProcs []ProcessInfo
|
||||
allProcs []ProcessInfo
|
||||
)
|
||||
|
||||
// Helper to launch concurrent collection.
|
||||
@@ -303,7 +314,7 @@ func (c *Collector) Collect() (MetricsSnapshot, error) {
|
||||
}
|
||||
return nil
|
||||
})
|
||||
collect(func() (err error) { topProcs = collectTopProcesses(); return nil })
|
||||
collect(func() (err error) { allProcs, err = collectProcesses(); return })
|
||||
|
||||
// Wait for all to complete.
|
||||
wg.Wait()
|
||||
@@ -318,6 +329,14 @@ func (c *Collector) Collect() (MetricsSnapshot, error) {
|
||||
hwInfo := c.cachedHW
|
||||
|
||||
score, scoreMsg := calculateHealthScore(cpuStats, memStats, diskStats, diskIO, thermalStats)
|
||||
topProcs := topProcesses(allProcs, 5)
|
||||
|
||||
var processAlerts []ProcessAlert
|
||||
c.watchMu.Lock()
|
||||
if c.processWatcher != nil {
|
||||
processAlerts = c.processWatcher.Update(now, allProcs)
|
||||
}
|
||||
c.watchMu.Unlock()
|
||||
|
||||
return MetricsSnapshot{
|
||||
CollectedAt: now,
|
||||
@@ -338,12 +357,14 @@ func (c *Collector) Collect() (MetricsSnapshot, error) {
|
||||
RxHistory: c.rxHistoryBuf.Slice(),
|
||||
TxHistory: c.txHistoryBuf.Slice(),
|
||||
},
|
||||
Proxy: proxyStats,
|
||||
Batteries: batteryStats,
|
||||
Thermal: thermalStats,
|
||||
Sensors: sensorStats,
|
||||
Bluetooth: btStats,
|
||||
TopProcesses: topProcs,
|
||||
Proxy: proxyStats,
|
||||
Batteries: batteryStats,
|
||||
Thermal: thermalStats,
|
||||
Sensors: sensorStats,
|
||||
Bluetooth: btStats,
|
||||
TopProcesses: topProcs,
|
||||
ProcessWatch: c.processWatch,
|
||||
ProcessAlerts: processAlerts,
|
||||
}, mergeErr
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user