Files
server/src/services/PerformanceMonitorService.ts
wdvipa 450367dea2 111
2026-02-09 16:34:01 +08:00

342 lines
8.9 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import Logger from '../utils/Logger'
/**
* 性能指标接口
*/
export interface PerformanceMetrics {
timestamp: number
memoryUsage: MemoryMetrics
connectionMetrics: ConnectionMetrics
messageMetrics: MessageMetrics
systemMetrics: SystemMetrics
}
/**
* 内存指标
*/
export interface MemoryMetrics {
heapUsed: number // MB
heapTotal: number // MB
external: number // MB
rss: number // MB
heapUsedPercent: number
}
/**
* 连接指标
*/
export interface ConnectionMetrics {
totalConnections: number
activeConnections: number
idleConnections: number
newConnectionsPerMinute: number
disconnectionsPerMinute: number
}
/**
* 消息指标
*/
export interface MessageMetrics {
messagesPerSecond: number
averageLatency: number // ms
p95Latency: number // ms
p99Latency: number // ms
errorRate: number // %
}
/**
* 系统指标
*/
export interface SystemMetrics {
uptime: number // seconds
cpuUsage: number // %
eventLoopLag: number // ms
}
/**
* 性能监控服务
*/
export class PerformanceMonitorService {
private logger = new Logger('PerformanceMonitor')
// 指标收集
private metrics: PerformanceMetrics[] = []
private readonly MAX_METRICS_HISTORY = 60 // 保留最近60条记录
// 消息延迟追踪
private messageLatencies: number[] = []
private readonly MAX_LATENCY_SAMPLES = 1000
// 连接统计
private connectionsPerMinute = 0
private disconnectionsPerMinute = 0
private lastConnectionCount = 0
// 消息统计
private messagesThisSecond = 0
private messagesLastSecond = 0
private errorsThisSecond = 0
private errorsLastSecond = 0
// 事件循环监控
private lastEventLoopCheck = Date.now()
private eventLoopLag = 0
constructor() {
this.startMonitoring()
}
/**
* 记录消息延迟
*/
recordMessageLatency(latency: number): void {
this.messageLatencies.push(latency)
if (this.messageLatencies.length > this.MAX_LATENCY_SAMPLES) {
this.messageLatencies.shift()
}
}
/**
* 记录消息
*/
recordMessage(): void {
this.messagesThisSecond++
}
/**
* 记录错误
*/
recordError(): void {
this.errorsThisSecond++
}
/**
* 记录连接
*/
recordConnection(): void {
this.connectionsPerMinute++
}
/**
* 记录断开连接
*/
recordDisconnection(): void {
this.disconnectionsPerMinute++
}
/**
* 获取当前性能指标
*/
getCurrentMetrics(): PerformanceMetrics {
const memUsage = process.memoryUsage()
const heapUsedMB = Math.round(memUsage.heapUsed / 1024 / 1024)
const heapTotalMB = Math.round(memUsage.heapTotal / 1024 / 1024)
const externalMB = Math.round(memUsage.external / 1024 / 1024)
const rssMB = Math.round(memUsage.rss / 1024 / 1024)
const metrics: PerformanceMetrics = {
timestamp: Date.now(),
memoryUsage: {
heapUsed: heapUsedMB,
heapTotal: heapTotalMB,
external: externalMB,
rss: rssMB,
heapUsedPercent: Math.round((heapUsedMB / heapTotalMB) * 100)
},
connectionMetrics: {
totalConnections: 0, // 由调用者设置
activeConnections: 0,
idleConnections: 0,
newConnectionsPerMinute: this.connectionsPerMinute,
disconnectionsPerMinute: this.disconnectionsPerMinute
},
messageMetrics: {
messagesPerSecond: this.messagesLastSecond,
averageLatency: this.calculateAverageLatency(),
p95Latency: this.calculatePercentileLatency(95),
p99Latency: this.calculatePercentileLatency(99),
errorRate: this.messagesLastSecond > 0
? Math.round((this.errorsLastSecond / this.messagesLastSecond) * 100 * 100) / 100
: 0
},
systemMetrics: {
uptime: Math.round(process.uptime()),
cpuUsage: this.calculateCpuUsage(),
eventLoopLag: this.eventLoopLag
}
}
return metrics
}
/**
* 计算平均延迟
*/
private calculateAverageLatency(): number {
if (this.messageLatencies.length === 0) return 0
const sum = this.messageLatencies.reduce((a, b) => a + b, 0)
return Math.round(sum / this.messageLatencies.length * 100) / 100
}
/**
* 计算百分位延迟
*/
private calculatePercentileLatency(percentile: number): number {
if (this.messageLatencies.length === 0) return 0
const sorted = [...this.messageLatencies].sort((a, b) => a - b)
const index = Math.ceil((percentile / 100) * sorted.length) - 1
return sorted[Math.max(0, index)]
}
/**
* 计算CPU使用率 (简化版)
*/
private calculateCpuUsage(): number {
// 这是一个简化的实现,实际应该使用 os.cpus() 或专门的库
const usage = process.cpuUsage()
return Math.round((usage.user + usage.system) / 1000000 * 100) / 100
}
/**
* 启动监控任务
*/
private startMonitoring(): void {
// 每秒更新消息统计
setInterval(() => {
this.messagesLastSecond = this.messagesThisSecond
this.errorsLastSecond = this.errorsThisSecond
this.messagesThisSecond = 0
this.errorsThisSecond = 0
}, 1000)
// 每分钟重置连接统计
setInterval(() => {
this.connectionsPerMinute = 0
this.disconnectionsPerMinute = 0
}, 60000)
// 每10秒收集一次完整指标
setInterval(() => {
const metrics = this.getCurrentMetrics()
this.metrics.push(metrics)
if (this.metrics.length > this.MAX_METRICS_HISTORY) {
this.metrics.shift()
}
this.logMetrics(metrics)
}, 10000)
// 监控事件循环延迟
this.monitorEventLoopLag()
}
/**
* 监控事件循环延迟
*/
private monitorEventLoopLag(): void {
let lastCheck = Date.now()
setInterval(() => {
const now = Date.now()
const expectedDelay = 1000 // 1秒
const actualDelay = now - lastCheck
this.eventLoopLag = Math.max(0, actualDelay - expectedDelay)
lastCheck = now
}, 1000)
}
/**
* 输出指标日志
*/
private logMetrics(metrics: PerformanceMetrics): void {
const mem = metrics.memoryUsage
const msg = metrics.messageMetrics
const conn = metrics.connectionMetrics
const sys = metrics.systemMetrics
this.logger.info(`
📊 性能指标 (${new Date(metrics.timestamp).toLocaleTimeString()}):
💾 内存: ${mem.heapUsed}MB / ${mem.heapTotal}MB (${mem.heapUsedPercent}%) | RSS: ${mem.rss}MB
📨 消息: ${msg.messagesPerSecond}/s | 延迟: ${msg.averageLatency}ms (p95: ${msg.p95Latency}ms, p99: ${msg.p99Latency}ms) | 错误率: ${msg.errorRate}%
🔌 连接: ${conn.totalConnections}个 (活跃: ${conn.activeConnections}, 空闲: ${conn.idleConnections}) | 新增: ${conn.newConnectionsPerMinute}/min
⚙️ 系统: 运行时间 ${sys.uptime}s | CPU: ${sys.cpuUsage}% | 事件循环延迟: ${sys.eventLoopLag}ms
`)
}
/**
* 获取历史指标
*/
getMetricsHistory(limit: number = 10): PerformanceMetrics[] {
return this.metrics.slice(-limit)
}
/**
* 获取性能警告
*/
getPerformanceWarnings(): string[] {
const warnings: string[] = []
const latest = this.metrics[this.metrics.length - 1]
if (!latest) return warnings
// 内存警告
if (latest.memoryUsage.heapUsedPercent > 80) {
warnings.push(`⚠️ 内存使用过高: ${latest.memoryUsage.heapUsedPercent}%`)
}
// 延迟警告
if (latest.messageMetrics.p99Latency > 500) {
warnings.push(`⚠️ 消息延迟过高: P99=${latest.messageMetrics.p99Latency}ms`)
}
// 错误率警告
if (latest.messageMetrics.errorRate > 5) {
warnings.push(`⚠️ 错误率过高: ${latest.messageMetrics.errorRate}%`)
}
// 事件循环延迟警告
if (latest.systemMetrics.eventLoopLag > 100) {
warnings.push(`⚠️ 事件循环延迟过高: ${latest.systemMetrics.eventLoopLag}ms`)
}
return warnings
}
/**
* 获取性能报告
*/
getPerformanceReport(): string {
const warnings = this.getPerformanceWarnings()
const latest = this.metrics[this.metrics.length - 1]
if (!latest) return '暂无数据'
let report = '📈 性能报告\n'
report += '='.repeat(50) + '\n'
report += `时间: ${new Date(latest.timestamp).toLocaleString()}\n`
report += `内存: ${latest.memoryUsage.heapUsed}MB / ${latest.memoryUsage.heapTotal}MB\n`
report += `消息吞吐: ${latest.messageMetrics.messagesPerSecond}/s\n`
report += `平均延迟: ${latest.messageMetrics.averageLatency}ms\n`
report += `连接数: ${latest.connectionMetrics.totalConnections}\n`
report += `运行时间: ${latest.systemMetrics.uptime}s\n`
if (warnings.length > 0) {
report += '\n⚠ 警告:\n'
warnings.forEach(w => report += ` ${w}\n`)
} else {
report += '\n✅ 系统运行正常\n'
}
return report
}
/**
* 清理资源
*/
destroy(): void {
this.metrics = []
this.messageLatencies = []
}
}