342 lines
8.9 KiB
TypeScript
342 lines
8.9 KiB
TypeScript
|
|
import Logger from '../utils/Logger'
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 性能指标接口
|
|||
|
|
*/
|
|||
|
|
export interface PerformanceMetrics {
|
|||
|
|
timestamp: number
|
|||
|
|
memoryUsage: MemoryMetrics
|
|||
|
|
connectionMetrics: ConnectionMetrics
|
|||
|
|
messageMetrics: MessageMetrics
|
|||
|
|
systemMetrics: SystemMetrics
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 内存指标
|
|||
|
|
*/
|
|||
|
|
export interface MemoryMetrics {
|
|||
|
|
heapUsed: number // MB
|
|||
|
|
heapTotal: number // MB
|
|||
|
|
external: number // MB
|
|||
|
|
rss: number // MB
|
|||
|
|
heapUsedPercent: number
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 连接指标
|
|||
|
|
*/
|
|||
|
|
export interface ConnectionMetrics {
|
|||
|
|
totalConnections: number
|
|||
|
|
activeConnections: number
|
|||
|
|
idleConnections: number
|
|||
|
|
newConnectionsPerMinute: number
|
|||
|
|
disconnectionsPerMinute: number
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 消息指标
|
|||
|
|
*/
|
|||
|
|
export interface MessageMetrics {
|
|||
|
|
messagesPerSecond: number
|
|||
|
|
averageLatency: number // ms
|
|||
|
|
p95Latency: number // ms
|
|||
|
|
p99Latency: number // ms
|
|||
|
|
errorRate: number // %
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 系统指标
|
|||
|
|
*/
|
|||
|
|
export interface SystemMetrics {
|
|||
|
|
uptime: number // seconds
|
|||
|
|
cpuUsage: number // %
|
|||
|
|
eventLoopLag: number // ms
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 性能监控服务
|
|||
|
|
*/
|
|||
|
|
export class PerformanceMonitorService {
|
|||
|
|
private logger = new Logger('PerformanceMonitor')
|
|||
|
|
|
|||
|
|
// 指标收集
|
|||
|
|
private metrics: PerformanceMetrics[] = []
|
|||
|
|
private readonly MAX_METRICS_HISTORY = 60 // 保留最近60条记录
|
|||
|
|
|
|||
|
|
// 消息延迟追踪
|
|||
|
|
private messageLatencies: number[] = []
|
|||
|
|
private readonly MAX_LATENCY_SAMPLES = 1000
|
|||
|
|
|
|||
|
|
// 连接统计
|
|||
|
|
private connectionsPerMinute = 0
|
|||
|
|
private disconnectionsPerMinute = 0
|
|||
|
|
private lastConnectionCount = 0
|
|||
|
|
|
|||
|
|
// 消息统计
|
|||
|
|
private messagesThisSecond = 0
|
|||
|
|
private messagesLastSecond = 0
|
|||
|
|
private errorsThisSecond = 0
|
|||
|
|
private errorsLastSecond = 0
|
|||
|
|
|
|||
|
|
// 事件循环监控
|
|||
|
|
private lastEventLoopCheck = Date.now()
|
|||
|
|
private eventLoopLag = 0
|
|||
|
|
|
|||
|
|
constructor() {
|
|||
|
|
this.startMonitoring()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 记录消息延迟
|
|||
|
|
*/
|
|||
|
|
recordMessageLatency(latency: number): void {
|
|||
|
|
this.messageLatencies.push(latency)
|
|||
|
|
if (this.messageLatencies.length > this.MAX_LATENCY_SAMPLES) {
|
|||
|
|
this.messageLatencies.shift()
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 记录消息
|
|||
|
|
*/
|
|||
|
|
recordMessage(): void {
|
|||
|
|
this.messagesThisSecond++
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 记录错误
|
|||
|
|
*/
|
|||
|
|
recordError(): void {
|
|||
|
|
this.errorsThisSecond++
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 记录连接
|
|||
|
|
*/
|
|||
|
|
recordConnection(): void {
|
|||
|
|
this.connectionsPerMinute++
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 记录断开连接
|
|||
|
|
*/
|
|||
|
|
recordDisconnection(): void {
|
|||
|
|
this.disconnectionsPerMinute++
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 获取当前性能指标
|
|||
|
|
*/
|
|||
|
|
getCurrentMetrics(): PerformanceMetrics {
|
|||
|
|
const memUsage = process.memoryUsage()
|
|||
|
|
const heapUsedMB = Math.round(memUsage.heapUsed / 1024 / 1024)
|
|||
|
|
const heapTotalMB = Math.round(memUsage.heapTotal / 1024 / 1024)
|
|||
|
|
const externalMB = Math.round(memUsage.external / 1024 / 1024)
|
|||
|
|
const rssMB = Math.round(memUsage.rss / 1024 / 1024)
|
|||
|
|
|
|||
|
|
const metrics: PerformanceMetrics = {
|
|||
|
|
timestamp: Date.now(),
|
|||
|
|
memoryUsage: {
|
|||
|
|
heapUsed: heapUsedMB,
|
|||
|
|
heapTotal: heapTotalMB,
|
|||
|
|
external: externalMB,
|
|||
|
|
rss: rssMB,
|
|||
|
|
heapUsedPercent: Math.round((heapUsedMB / heapTotalMB) * 100)
|
|||
|
|
},
|
|||
|
|
connectionMetrics: {
|
|||
|
|
totalConnections: 0, // 由调用者设置
|
|||
|
|
activeConnections: 0,
|
|||
|
|
idleConnections: 0,
|
|||
|
|
newConnectionsPerMinute: this.connectionsPerMinute,
|
|||
|
|
disconnectionsPerMinute: this.disconnectionsPerMinute
|
|||
|
|
},
|
|||
|
|
messageMetrics: {
|
|||
|
|
messagesPerSecond: this.messagesLastSecond,
|
|||
|
|
averageLatency: this.calculateAverageLatency(),
|
|||
|
|
p95Latency: this.calculatePercentileLatency(95),
|
|||
|
|
p99Latency: this.calculatePercentileLatency(99),
|
|||
|
|
errorRate: this.messagesLastSecond > 0
|
|||
|
|
? Math.round((this.errorsLastSecond / this.messagesLastSecond) * 100 * 100) / 100
|
|||
|
|
: 0
|
|||
|
|
},
|
|||
|
|
systemMetrics: {
|
|||
|
|
uptime: Math.round(process.uptime()),
|
|||
|
|
cpuUsage: this.calculateCpuUsage(),
|
|||
|
|
eventLoopLag: this.eventLoopLag
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return metrics
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 计算平均延迟
|
|||
|
|
*/
|
|||
|
|
private calculateAverageLatency(): number {
|
|||
|
|
if (this.messageLatencies.length === 0) return 0
|
|||
|
|
const sum = this.messageLatencies.reduce((a, b) => a + b, 0)
|
|||
|
|
return Math.round(sum / this.messageLatencies.length * 100) / 100
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 计算百分位延迟
|
|||
|
|
*/
|
|||
|
|
private calculatePercentileLatency(percentile: number): number {
|
|||
|
|
if (this.messageLatencies.length === 0) return 0
|
|||
|
|
const sorted = [...this.messageLatencies].sort((a, b) => a - b)
|
|||
|
|
const index = Math.ceil((percentile / 100) * sorted.length) - 1
|
|||
|
|
return sorted[Math.max(0, index)]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 计算CPU使用率 (简化版)
|
|||
|
|
*/
|
|||
|
|
private calculateCpuUsage(): number {
|
|||
|
|
// 这是一个简化的实现,实际应该使用 os.cpus() 或专门的库
|
|||
|
|
const usage = process.cpuUsage()
|
|||
|
|
return Math.round((usage.user + usage.system) / 1000000 * 100) / 100
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 启动监控任务
|
|||
|
|
*/
|
|||
|
|
private startMonitoring(): void {
|
|||
|
|
// 每秒更新消息统计
|
|||
|
|
setInterval(() => {
|
|||
|
|
this.messagesLastSecond = this.messagesThisSecond
|
|||
|
|
this.errorsLastSecond = this.errorsThisSecond
|
|||
|
|
this.messagesThisSecond = 0
|
|||
|
|
this.errorsThisSecond = 0
|
|||
|
|
}, 1000)
|
|||
|
|
|
|||
|
|
// 每分钟重置连接统计
|
|||
|
|
setInterval(() => {
|
|||
|
|
this.connectionsPerMinute = 0
|
|||
|
|
this.disconnectionsPerMinute = 0
|
|||
|
|
}, 60000)
|
|||
|
|
|
|||
|
|
// 每10秒收集一次完整指标
|
|||
|
|
setInterval(() => {
|
|||
|
|
const metrics = this.getCurrentMetrics()
|
|||
|
|
this.metrics.push(metrics)
|
|||
|
|
|
|||
|
|
if (this.metrics.length > this.MAX_METRICS_HISTORY) {
|
|||
|
|
this.metrics.shift()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
this.logMetrics(metrics)
|
|||
|
|
}, 10000)
|
|||
|
|
|
|||
|
|
// 监控事件循环延迟
|
|||
|
|
this.monitorEventLoopLag()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 监控事件循环延迟
|
|||
|
|
*/
|
|||
|
|
private monitorEventLoopLag(): void {
|
|||
|
|
let lastCheck = Date.now()
|
|||
|
|
|
|||
|
|
setInterval(() => {
|
|||
|
|
const now = Date.now()
|
|||
|
|
const expectedDelay = 1000 // 1秒
|
|||
|
|
const actualDelay = now - lastCheck
|
|||
|
|
this.eventLoopLag = Math.max(0, actualDelay - expectedDelay)
|
|||
|
|
lastCheck = now
|
|||
|
|
}, 1000)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 输出指标日志
|
|||
|
|
*/
|
|||
|
|
private logMetrics(metrics: PerformanceMetrics): void {
|
|||
|
|
const mem = metrics.memoryUsage
|
|||
|
|
const msg = metrics.messageMetrics
|
|||
|
|
const conn = metrics.connectionMetrics
|
|||
|
|
const sys = metrics.systemMetrics
|
|||
|
|
|
|||
|
|
this.logger.info(`
|
|||
|
|
📊 性能指标 (${new Date(metrics.timestamp).toLocaleTimeString()}):
|
|||
|
|
💾 内存: ${mem.heapUsed}MB / ${mem.heapTotal}MB (${mem.heapUsedPercent}%) | RSS: ${mem.rss}MB
|
|||
|
|
📨 消息: ${msg.messagesPerSecond}/s | 延迟: ${msg.averageLatency}ms (p95: ${msg.p95Latency}ms, p99: ${msg.p99Latency}ms) | 错误率: ${msg.errorRate}%
|
|||
|
|
🔌 连接: ${conn.totalConnections}个 (活跃: ${conn.activeConnections}, 空闲: ${conn.idleConnections}) | 新增: ${conn.newConnectionsPerMinute}/min
|
|||
|
|
⚙️ 系统: 运行时间 ${sys.uptime}s | CPU: ${sys.cpuUsage}% | 事件循环延迟: ${sys.eventLoopLag}ms
|
|||
|
|
`)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 获取历史指标
|
|||
|
|
*/
|
|||
|
|
getMetricsHistory(limit: number = 10): PerformanceMetrics[] {
|
|||
|
|
return this.metrics.slice(-limit)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 获取性能警告
|
|||
|
|
*/
|
|||
|
|
getPerformanceWarnings(): string[] {
|
|||
|
|
const warnings: string[] = []
|
|||
|
|
const latest = this.metrics[this.metrics.length - 1]
|
|||
|
|
|
|||
|
|
if (!latest) return warnings
|
|||
|
|
|
|||
|
|
// 内存警告
|
|||
|
|
if (latest.memoryUsage.heapUsedPercent > 80) {
|
|||
|
|
warnings.push(`⚠️ 内存使用过高: ${latest.memoryUsage.heapUsedPercent}%`)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 延迟警告
|
|||
|
|
if (latest.messageMetrics.p99Latency > 500) {
|
|||
|
|
warnings.push(`⚠️ 消息延迟过高: P99=${latest.messageMetrics.p99Latency}ms`)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 错误率警告
|
|||
|
|
if (latest.messageMetrics.errorRate > 5) {
|
|||
|
|
warnings.push(`⚠️ 错误率过高: ${latest.messageMetrics.errorRate}%`)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 事件循环延迟警告
|
|||
|
|
if (latest.systemMetrics.eventLoopLag > 100) {
|
|||
|
|
warnings.push(`⚠️ 事件循环延迟过高: ${latest.systemMetrics.eventLoopLag}ms`)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return warnings
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 获取性能报告
|
|||
|
|
*/
|
|||
|
|
getPerformanceReport(): string {
|
|||
|
|
const warnings = this.getPerformanceWarnings()
|
|||
|
|
const latest = this.metrics[this.metrics.length - 1]
|
|||
|
|
|
|||
|
|
if (!latest) return '暂无数据'
|
|||
|
|
|
|||
|
|
let report = '📈 性能报告\n'
|
|||
|
|
report += '='.repeat(50) + '\n'
|
|||
|
|
report += `时间: ${new Date(latest.timestamp).toLocaleString()}\n`
|
|||
|
|
report += `内存: ${latest.memoryUsage.heapUsed}MB / ${latest.memoryUsage.heapTotal}MB\n`
|
|||
|
|
report += `消息吞吐: ${latest.messageMetrics.messagesPerSecond}/s\n`
|
|||
|
|
report += `平均延迟: ${latest.messageMetrics.averageLatency}ms\n`
|
|||
|
|
report += `连接数: ${latest.connectionMetrics.totalConnections}\n`
|
|||
|
|
report += `运行时间: ${latest.systemMetrics.uptime}s\n`
|
|||
|
|
|
|||
|
|
if (warnings.length > 0) {
|
|||
|
|
report += '\n⚠️ 警告:\n'
|
|||
|
|
warnings.forEach(w => report += ` ${w}\n`)
|
|||
|
|
} else {
|
|||
|
|
report += '\n✅ 系统运行正常\n'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return report
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 清理资源
|
|||
|
|
*/
|
|||
|
|
destroy(): void {
|
|||
|
|
this.metrics = []
|
|||
|
|
this.messageLatencies = []
|
|||
|
|
}
|
|||
|
|
}
|