111

2026-02-09 16:34:01 +08:00
commit 450367dea2
92 changed files with 36822 additions and 0 deletions
--- a/src/services/PerformanceMonitorService.ts
+++ b/src/services/PerformanceMonitorService.ts
@@ -0,0 +1,341 @@
+import Logger from '../utils/Logger'
+
+/**
+ * 性能指标接口
+ */
+export interface PerformanceMetrics {
+  timestamp: number
+  memoryUsage: MemoryMetrics
+  connectionMetrics: ConnectionMetrics
+  messageMetrics: MessageMetrics
+  systemMetrics: SystemMetrics
+}
+
+/**
+ * 内存指标
+ */
+export interface MemoryMetrics {
+  heapUsed: number      // MB
+  heapTotal: number     // MB
+  external: number      // MB
+  rss: number          // MB
+  heapUsedPercent: number
+}
+
+/**
+ * 连接指标
+ */
+export interface ConnectionMetrics {
+  totalConnections: number
+  activeConnections: number
+  idleConnections: number
+  newConnectionsPerMinute: number
+  disconnectionsPerMinute: number
+}
+
+/**
+ * 消息指标
+ */
+export interface MessageMetrics {
+  messagesPerSecond: number
+  averageLatency: number  // ms
+  p95Latency: number      // ms
+  p99Latency: number      // ms
+  errorRate: number       // %
+}
+
+/**
+ * 系统指标
+ */
+export interface SystemMetrics {
+  uptime: number          // seconds
+  cpuUsage: number        // %
+  eventLoopLag: number    // ms
+}
+
+/**
+ * 性能监控服务
+ */
+export class PerformanceMonitorService {
+  private logger = new Logger('PerformanceMonitor')
+  
+  // 指标收集
+  private metrics: PerformanceMetrics[] = []
+  private readonly MAX_METRICS_HISTORY = 60  // 保留最近60条记录
+  
+  // 消息延迟追踪
+  private messageLatencies: number[] = []
+  private readonly MAX_LATENCY_SAMPLES = 1000
+  
+  // 连接统计
+  private connectionsPerMinute = 0
+  private disconnectionsPerMinute = 0
+  private lastConnectionCount = 0
+  
+  // 消息统计
+  private messagesThisSecond = 0
+  private messagesLastSecond = 0
+  private errorsThisSecond = 0
+  private errorsLastSecond = 0
+  
+  // 事件循环监控
+  private lastEventLoopCheck = Date.now()
+  private eventLoopLag = 0
+
+  constructor() {
+    this.startMonitoring()
+  }
+
+  /**
+   * 记录消息延迟
+   */
+  recordMessageLatency(latency: number): void {
+    this.messageLatencies.push(latency)
+    if (this.messageLatencies.length > this.MAX_LATENCY_SAMPLES) {
+      this.messageLatencies.shift()
+    }
+  }
+
+  /**
+   * 记录消息
+   */
+  recordMessage(): void {
+    this.messagesThisSecond++
+  }
+
+  /**
+   * 记录错误
+   */
+  recordError(): void {
+    this.errorsThisSecond++
+  }
+
+  /**
+   * 记录连接
+   */
+  recordConnection(): void {
+    this.connectionsPerMinute++
+  }
+
+  /**
+   * 记录断开连接
+   */
+  recordDisconnection(): void {
+    this.disconnectionsPerMinute++
+  }
+
+  /**
+   * 获取当前性能指标
+   */
+  getCurrentMetrics(): PerformanceMetrics {
+    const memUsage = process.memoryUsage()
+    const heapUsedMB = Math.round(memUsage.heapUsed / 1024 / 1024)
+    const heapTotalMB = Math.round(memUsage.heapTotal / 1024 / 1024)
+    const externalMB = Math.round(memUsage.external / 1024 / 1024)
+    const rssMB = Math.round(memUsage.rss / 1024 / 1024)
+
+    const metrics: PerformanceMetrics = {
+      timestamp: Date.now(),
+      memoryUsage: {
+        heapUsed: heapUsedMB,
+        heapTotal: heapTotalMB,
+        external: externalMB,
+        rss: rssMB,
+        heapUsedPercent: Math.round((heapUsedMB / heapTotalMB) * 100)
+      },
+      connectionMetrics: {
+        totalConnections: 0,  // 由调用者设置
+        activeConnections: 0,
+        idleConnections: 0,
+        newConnectionsPerMinute: this.connectionsPerMinute,
+        disconnectionsPerMinute: this.disconnectionsPerMinute
+      },
+      messageMetrics: {
+        messagesPerSecond: this.messagesLastSecond,
+        averageLatency: this.calculateAverageLatency(),
+        p95Latency: this.calculatePercentileLatency(95),
+        p99Latency: this.calculatePercentileLatency(99),
+        errorRate: this.messagesLastSecond > 0
+          ? Math.round((this.errorsLastSecond / this.messagesLastSecond) * 100 * 100) / 100
+          : 0
+      },
+      systemMetrics: {
+        uptime: Math.round(process.uptime()),
+        cpuUsage: this.calculateCpuUsage(),
+        eventLoopLag: this.eventLoopLag
+      }
+    }
+
+    return metrics
+  }
+
+  /**
+   * 计算平均延迟
+   */
+  private calculateAverageLatency(): number {
+    if (this.messageLatencies.length === 0) return 0
+    const sum = this.messageLatencies.reduce((a, b) => a + b, 0)
+    return Math.round(sum / this.messageLatencies.length * 100) / 100
+  }
+
+  /**
+   * 计算百分位延迟
+   */
+  private calculatePercentileLatency(percentile: number): number {
+    if (this.messageLatencies.length === 0) return 0
+    const sorted = [...this.messageLatencies].sort((a, b) => a - b)
+    const index = Math.ceil((percentile / 100) * sorted.length) - 1
+    return sorted[Math.max(0, index)]
+  }
+
+  /**
+   * 计算CPU使用率 (简化版)
+   */
+  private calculateCpuUsage(): number {
+    // 这是一个简化的实现，实际应该使用 os.cpus() 或专门的库
+    const usage = process.cpuUsage()
+    return Math.round((usage.user + usage.system) / 1000000 * 100) / 100
+  }
+
+  /**
+   * 启动监控任务
+   */
+  private startMonitoring(): void {
+    // 每秒更新消息统计
+    setInterval(() => {
+      this.messagesLastSecond = this.messagesThisSecond
+      this.errorsLastSecond = this.errorsThisSecond
+      this.messagesThisSecond = 0
+      this.errorsThisSecond = 0
+    }, 1000)
+
+    // 每分钟重置连接统计
+    setInterval(() => {
+      this.connectionsPerMinute = 0
+      this.disconnectionsPerMinute = 0
+    }, 60000)
+
+    // 每10秒收集一次完整指标
+    setInterval(() => {
+      const metrics = this.getCurrentMetrics()
+      this.metrics.push(metrics)
+      
+      if (this.metrics.length > this.MAX_METRICS_HISTORY) {
+        this.metrics.shift()
+      }
+
+      this.logMetrics(metrics)
+    }, 10000)
+
+    // 监控事件循环延迟
+    this.monitorEventLoopLag()
+  }
+
+  /**
+   * 监控事件循环延迟
+   */
+  private monitorEventLoopLag(): void {
+    let lastCheck = Date.now()
+    
+    setInterval(() => {
+      const now = Date.now()
+      const expectedDelay = 1000  // 1秒
+      const actualDelay = now - lastCheck
+      this.eventLoopLag = Math.max(0, actualDelay - expectedDelay)
+      lastCheck = now
+    }, 1000)
+  }
+
+  /**
+   * 输出指标日志
+   */
+  private logMetrics(metrics: PerformanceMetrics): void {
+    const mem = metrics.memoryUsage
+    const msg = metrics.messageMetrics
+    const conn = metrics.connectionMetrics
+    const sys = metrics.systemMetrics
+
+    this.logger.info(`
+📊 性能指标 (${new Date(metrics.timestamp).toLocaleTimeString()}):
+  💾 内存: ${mem.heapUsed}MB / ${mem.heapTotal}MB (${mem.heapUsedPercent}%) | RSS: ${mem.rss}MB
+  📨 消息: ${msg.messagesPerSecond}/s | 延迟: ${msg.averageLatency}ms (p95: ${msg.p95Latency}ms, p99: ${msg.p99Latency}ms) | 错误率: ${msg.errorRate}%
+  🔌 连接: ${conn.totalConnections}个 (活跃: ${conn.activeConnections}, 空闲: ${conn.idleConnections}) | 新增: ${conn.newConnectionsPerMinute}/min
+  ⚙️  系统: 运行时间 ${sys.uptime}s | CPU: ${sys.cpuUsage}% | 事件循环延迟: ${sys.eventLoopLag}ms
+    `)
+  }
+
+  /**
+   * 获取历史指标
+   */
+  getMetricsHistory(limit: number = 10): PerformanceMetrics[] {
+    return this.metrics.slice(-limit)
+  }
+
+  /**
+   * 获取性能警告
+   */
+  getPerformanceWarnings(): string[] {
+    const warnings: string[] = []
+    const latest = this.metrics[this.metrics.length - 1]
+
+    if (!latest) return warnings
+
+    // 内存警告
+    if (latest.memoryUsage.heapUsedPercent > 80) {
+      warnings.push(`⚠️ 内存使用过高: ${latest.memoryUsage.heapUsedPercent}%`)
+    }
+
+    // 延迟警告
+    if (latest.messageMetrics.p99Latency > 500) {
+      warnings.push(`⚠️ 消息延迟过高: P99=${latest.messageMetrics.p99Latency}ms`)
+    }
+
+    // 错误率警告
+    if (latest.messageMetrics.errorRate > 5) {
+      warnings.push(`⚠️ 错误率过高: ${latest.messageMetrics.errorRate}%`)
+    }
+
+    // 事件循环延迟警告
+    if (latest.systemMetrics.eventLoopLag > 100) {
+      warnings.push(`⚠️ 事件循环延迟过高: ${latest.systemMetrics.eventLoopLag}ms`)
+    }
+
+    return warnings
+  }
+
+  /**
+   * 获取性能报告
+   */
+  getPerformanceReport(): string {
+    const warnings = this.getPerformanceWarnings()
+    const latest = this.metrics[this.metrics.length - 1]
+
+    if (!latest) return '暂无数据'
+
+    let report = '📈 性能报告\n'
+    report += '='.repeat(50) + '\n'
+    report += `时间: ${new Date(latest.timestamp).toLocaleString()}\n`
+    report += `内存: ${latest.memoryUsage.heapUsed}MB / ${latest.memoryUsage.heapTotal}MB\n`
+    report += `消息吞吐: ${latest.messageMetrics.messagesPerSecond}/s\n`
+    report += `平均延迟: ${latest.messageMetrics.averageLatency}ms\n`
+    report += `连接数: ${latest.connectionMetrics.totalConnections}\n`
+    report += `运行时间: ${latest.systemMetrics.uptime}s\n`
+
+    if (warnings.length > 0) {
+      report += '\n⚠️ 警告:\n'
+      warnings.forEach(w => report += `  ${w}\n`)
+    } else {
+      report += '\n✅ 系统运行正常\n'
+    }
+
+    return report
+  }
+
+  /**
+   * 清理资源
+   */
+  destroy(): void {
+    this.metrics = []
+    this.messageLatencies = []
+  }
+}