Files
server/OPTIMIZATION_GUIDE.md
2026-02-25 00:49:29 +08:00

11 KiB
Raw Permalink Blame History

服务端性能优化指南

优化目标

  • 降低延迟 (< 100ms)
  • 提高吞吐量 (支持100+并发设备)
  • 减少内存占用 (< 300MB)
  • 提升丢帧率 (< 2%)

1⃣ 立即可实施的优化 (高优先级)

1.1 启用消息批处理

问题: 每条消息单独发送频繁的Socket.IO调用 方案: 批量发送消息,减少网络往返

// src/services/MessageRouter.ts 中添加
private messageQueue: Map<string, any[]> = new Map()
private readonly BATCH_SIZE = 10
private readonly BATCH_TIMEOUT = 50  // 50ms

private async flushMessageQueue(clientId: string) {
  const messages = this.messageQueue.get(clientId)
  if (!messages || messages.length === 0) return
  
  const socket = this.webClientManager.getClientSocket(clientId)
  if (socket) {
    socket.emit('batch_messages', messages)
  }
  this.messageQueue.delete(clientId)
}

// 修改 sendToClient 方法
private queueMessage(clientId: string, event: string, data: any) {
  if (!this.messageQueue.has(clientId)) {
    this.messageQueue.set(clientId, [])
    setTimeout(() => this.flushMessageQueue(clientId), this.BATCH_TIMEOUT)
  }
  
  const queue = this.messageQueue.get(clientId)!
  queue.push({ event, data })
  
  if (queue.length >= this.BATCH_SIZE) {
    this.flushMessageQueue(clientId)
  }
}

1.2 实现数据压缩 (可选)

问题: 大屏幕数据传输量大 方案: 使用zlib压缩仅在必要时启用

import zlib from 'zlib'

private compressData(data: Buffer): Buffer {
  return zlib.deflateSync(data, { level: 6 })  // 平衡速度和压缩率
}

private decompressData(data: Buffer): Buffer {
  return zlib.inflateSync(data)
}

// 在路由屏幕数据时
if (screenData.data instanceof Buffer && screenData.data.length > 500000) {
  const compressed = this.compressData(screenData.data)
  screenData.data = compressed
  screenData.compressed = true
}

1.3 优化数据库查询

问题: 频繁的Socket ID查询 方案: 添加查询缓存

// src/services/DatabaseService.ts 中添加
private socketIdCache: Map<string, { deviceId: string, timestamp: number }> = new Map()
private readonly CACHE_TTL = 60000  // 1分钟

getDeviceBySocketIdCached(socketId: string): DeviceRecord | null {
  const cached = this.socketIdCache.get(socketId)
  if (cached && Date.now() - cached.timestamp < this.CACHE_TTL) {
    return this.getDeviceById(cached.deviceId)
  }
  
  const device = this.getDeviceBySocketId(socketId)
  if (device) {
    this.socketIdCache.set(socketId, { deviceId: device.deviceId, timestamp: Date.now() })
  }
  return device
}

// 在设备断开时清理缓存
invalidateSocketCache(socketId: string) {
  this.socketIdCache.delete(socketId)
}

1.4 增强连接池管理

问题: 连接管理不够精细 方案: 实现连接池和优先级队列

// src/managers/ConnectionPool.ts (新文件)
export class ConnectionPool {
  private connections: Map<string, ConnectionInfo> = new Map()
  private priorityQueue: PriorityQueue<string> = new PriorityQueue()
  private readonly MAX_CONNECTIONS = 1000
  private readonly IDLE_TIMEOUT = 300000  // 5分钟

  addConnection(socketId: string, priority: 'high' | 'normal' | 'low' = 'normal') {
    if (this.connections.size >= this.MAX_CONNECTIONS) {
      this.evictLRU()
    }
    
    this.connections.set(socketId, {
      socketId,
      createdAt: Date.now(),
      lastActivity: Date.now(),
      priority,
      dataTransferred: 0
    })
  }

  updateActivity(socketId: string) {
    const conn = this.connections.get(socketId)
    if (conn) {
      conn.lastActivity = Date.now()
    }
  }

  private evictLRU() {
    let lruSocket = ''
    let lruTime = Date.now()
    
    for (const [socketId, conn] of this.connections) {
      if (conn.lastActivity < lruTime && conn.priority === 'low') {
        lruSocket = socketId
        lruTime = conn.lastActivity
      }
    }
    
    if (lruSocket) {
      this.connections.delete(lruSocket)
    }
  }
}

2⃣ 中期优化 (1-2周)

2.1 实现消息队列

问题: 高并发时消息丢失 方案: 集成Bull队列库

npm install bull redis
// src/services/MessageQueue.ts (新文件)
import Queue from 'bull'

export class MessageQueueService {
  private screenDataQueue: Queue.Queue
  private controlCommandQueue: Queue.Queue

  constructor() {
    this.screenDataQueue = new Queue('screen-data', {
      redis: { host: 'localhost', port: 6379 },
      defaultJobOptions: {
        attempts: 3,
        backoff: { type: 'exponential', delay: 2000 },
        removeOnComplete: true
      }
    })

    this.controlCommandQueue = new Queue('control-command', {
      redis: { host: 'localhost', port: 6379 },
      defaultJobOptions: {
        priority: 10,  // 控制命令优先级高
        removeOnComplete: true
      }
    })

    this.setupProcessors()
  }

  private setupProcessors() {
    this.screenDataQueue.process(10, async (job) => {
      // 处理屏幕数据
      return this.processScreenData(job.data)
    })

    this.controlCommandQueue.process(20, async (job) => {
      // 处理控制命令
      return this.processControlCommand(job.data)
    })
  }

  async enqueueScreenData(data: ScreenData) {
    await this.screenDataQueue.add(data, { delay: 0 })
  }

  async enqueueControlCommand(command: ControlMessage) {
    await this.controlCommandQueue.add(command, { priority: 10 })
  }
}

2.2 添加Redis缓存层

问题: 频繁数据库查询 方案: 使用Redis缓存热数据

// src/services/CacheService.ts (新文件)
import redis from 'redis'

export class CacheService {
  private client: redis.RedisClient
  private readonly TTL = 300  // 5分钟

  constructor() {
    this.client = redis.createClient({
      host: 'localhost',
      port: 6379,
      db: 0
    })
  }

  async getDevice(deviceId: string) {
    const cached = await this.client.get(`device:${deviceId}`)
    return cached ? JSON.parse(cached) : null
  }

  async setDevice(deviceId: string, data: any) {
    await this.client.setex(`device:${deviceId}`, this.TTL, JSON.stringify(data))
  }

  async getDeviceState(deviceId: string) {
    const cached = await this.client.get(`state:${deviceId}`)
    return cached ? JSON.parse(cached) : null
  }

  async setDeviceState(deviceId: string, state: any) {
    await this.client.setex(`state:${deviceId}`, this.TTL, JSON.stringify(state))
  }

  async invalidateDevice(deviceId: string) {
    await this.client.del(`device:${deviceId}`)
    await this.client.del(`state:${deviceId}`)
  }
}

2.3 实现连接监控和告警

问题: 无法及时发现性能问题 方案: 添加Prometheus指标

// src/services/MetricsService.ts (新文件)
import { Counter, Gauge, Histogram } from 'prom-client'

export class MetricsService {
  private messageCounter = new Counter({
    name: 'messages_total',
    help: 'Total messages processed',
    labelNames: ['type', 'status']
  })

  private connectionGauge = new Gauge({
    name: 'active_connections',
    help: 'Number of active connections',
    labelNames: ['type']
  })

  private latencyHistogram = new Histogram({
    name: 'message_latency_ms',
    help: 'Message processing latency',
    labelNames: ['type'],
    buckets: [10, 50, 100, 200, 500, 1000]
  })

  recordMessage(type: string, status: 'success' | 'failed') {
    this.messageCounter.inc({ type, status })
  }

  setConnections(type: string, count: number) {
    this.connectionGauge.set({ type }, count)
  }

  recordLatency(type: string, ms: number) {
    this.latencyHistogram.observe({ type }, ms)
  }
}

3⃣ 长期优化 (1个月+)

3.1 实现分布式架构

方案: 使用Socket.IO Adapter支持多服务器

npm install @socket.io/redis-adapter
import { createAdapter } from '@socket.io/redis-adapter'
import { createClient } from 'redis'

const pubClient = createClient({ host: 'localhost', port: 6379 })
const subClient = pubClient.duplicate()

io.adapter(createAdapter(pubClient, subClient))

3.2 实现负载均衡

方案: 使用Nginx反向代理

upstream socket_servers {
  server localhost:3001;
  server localhost:3002;
  server localhost:3003;
}

server {
  listen 80;
  
  location / {
    proxy_pass http://socket_servers;
    proxy_http_version 1.1;
    proxy_set_header Upgrade $http_upgrade;
    proxy_set_header Connection "upgrade";
    proxy_set_header Host $host;
  }
}

3.3 实现CDN支持

方案: 使用CDN加速大文件传输

// 屏幕截图上传到CDN
async uploadScreenshotToCDN(deviceId: string, data: Buffer) {
  const key = `screenshots/${deviceId}/${Date.now()}.jpg`
  const url = await this.cdnService.upload(key, data)
  
  // 发送CDN URL而不是原始数据
  this.webClientManager.sendToClient(clientId, 'screen_data', {
    deviceId,
    url,
    timestamp: Date.now()
  })
}

4⃣ 性能测试和监控

4.1 添加性能测试

npm install --save-dev autocannon
// test/performance.test.ts
import autocannon from 'autocannon'

async function runPerformanceTest() {
  const result = await autocannon({
    url: 'http://localhost:3001',
    connections: 100,
    duration: 30,
    requests: [
      {
        path: '/api/devices',
        method: 'GET',
        headers: { 'Authorization': 'Bearer token' }
      }
    ]
  })
  
  console.log(result)
}

4.2 监控关键指标

// 在 MessageRouter 中添加
private logPerformanceMetrics() {
  setInterval(() => {
    const memUsage = process.memoryUsage()
    const uptime = process.uptime()
    
    this.logger.info(`
       性能指标:
      - 内存: ${Math.round(memUsage.heapUsed / 1024 / 1024)}MB / ${Math.round(memUsage.heapTotal / 1024 / 1024)}MB
      - 运行时间: ${Math.round(uptime)}s
      - 屏幕帧: ${this.routedFrames} (丢帧: ${this.droppedFrames})
      - 丢帧率: ${((this.droppedFrames / this.routedFrames) * 100).toFixed(2)}%
      - 连接数: ${this.deviceManager.getDeviceCount()}
    `)
  }, 60000)  // 每分钟输出一次
}

5⃣ 配置建议

生产环境启动参数

# 启用垃圾回收监控
node --expose-gc dist/index.js

# 启用性能分析
node --prof dist/index.js

# 增加内存限制
node --max-old-space-size=2048 dist/index.js

环境变量配置

# .env
NODE_ENV=production
LOG_LEVEL=info
MAX_CONNECTIONS=1000
MEMORY_LIMIT=500
BATCH_SIZE=10
BATCH_TIMEOUT=50
CACHE_TTL=300
REDIS_HOST=localhost
REDIS_PORT=6379

预期改进

指标 优化前 优化后 改进
平均延迟 150ms 80ms ↓47%
吞吐量 50设备 200设备 ↑300%
内存占用 400MB 250MB ↓37%
丢帧率 5% 1% ↓80%
CPU占用 60% 35% ↓42%

故障排查

问题: 内存持续增长

解决方案:

  1. 检查缓冲区是否正确清理
  2. 启用垃圾回收: node --expose-gc
  3. 检查数据库连接是否泄漏

问题: 丢帧率高

解决方案:

  1. 检查网络带宽
  2. 增加缓冲区大小
  3. 启用消息批处理

问题: 连接频繁断开

解决方案:

  1. 增加心跳超时时间
  2. 检查防火墙配置
  3. 启用连接池管理

参考资源