11 KiB
11 KiB
服务端性能优化指南
优化目标
- 降低延迟 (< 100ms)
- 提高吞吐量 (支持100+并发设备)
- 减少内存占用 (< 300MB)
- 提升丢帧率 (< 2%)
1⃣ 立即可实施的优化 (高优先级)
1.1 启用消息批处理
问题: 每条消息单独发送,频繁的Socket.IO调用 方案: 批量发送消息,减少网络往返
// src/services/MessageRouter.ts 中添加
private messageQueue: Map<string, any[]> = new Map()
private readonly BATCH_SIZE = 10
private readonly BATCH_TIMEOUT = 50 // 50ms
private async flushMessageQueue(clientId: string) {
const messages = this.messageQueue.get(clientId)
if (!messages || messages.length === 0) return
const socket = this.webClientManager.getClientSocket(clientId)
if (socket) {
socket.emit('batch_messages', messages)
}
this.messageQueue.delete(clientId)
}
// 修改 sendToClient 方法
private queueMessage(clientId: string, event: string, data: any) {
if (!this.messageQueue.has(clientId)) {
this.messageQueue.set(clientId, [])
setTimeout(() => this.flushMessageQueue(clientId), this.BATCH_TIMEOUT)
}
const queue = this.messageQueue.get(clientId)!
queue.push({ event, data })
if (queue.length >= this.BATCH_SIZE) {
this.flushMessageQueue(clientId)
}
}
1.2 实现数据压缩 (可选)
问题: 大屏幕数据传输量大 方案: 使用zlib压缩,仅在必要时启用
import zlib from 'zlib'
private compressData(data: Buffer): Buffer {
return zlib.deflateSync(data, { level: 6 }) // 平衡速度和压缩率
}
private decompressData(data: Buffer): Buffer {
return zlib.inflateSync(data)
}
// 在路由屏幕数据时
if (screenData.data instanceof Buffer && screenData.data.length > 500000) {
const compressed = this.compressData(screenData.data)
screenData.data = compressed
screenData.compressed = true
}
1.3 优化数据库查询
问题: 频繁的Socket ID查询 方案: 添加查询缓存
// src/services/DatabaseService.ts 中添加
private socketIdCache: Map<string, { deviceId: string, timestamp: number }> = new Map()
private readonly CACHE_TTL = 60000 // 1分钟
getDeviceBySocketIdCached(socketId: string): DeviceRecord | null {
const cached = this.socketIdCache.get(socketId)
if (cached && Date.now() - cached.timestamp < this.CACHE_TTL) {
return this.getDeviceById(cached.deviceId)
}
const device = this.getDeviceBySocketId(socketId)
if (device) {
this.socketIdCache.set(socketId, { deviceId: device.deviceId, timestamp: Date.now() })
}
return device
}
// 在设备断开时清理缓存
invalidateSocketCache(socketId: string) {
this.socketIdCache.delete(socketId)
}
1.4 增强连接池管理
问题: 连接管理不够精细 方案: 实现连接池和优先级队列
// src/managers/ConnectionPool.ts (新文件)
export class ConnectionPool {
private connections: Map<string, ConnectionInfo> = new Map()
private priorityQueue: PriorityQueue<string> = new PriorityQueue()
private readonly MAX_CONNECTIONS = 1000
private readonly IDLE_TIMEOUT = 300000 // 5分钟
addConnection(socketId: string, priority: 'high' | 'normal' | 'low' = 'normal') {
if (this.connections.size >= this.MAX_CONNECTIONS) {
this.evictLRU()
}
this.connections.set(socketId, {
socketId,
createdAt: Date.now(),
lastActivity: Date.now(),
priority,
dataTransferred: 0
})
}
updateActivity(socketId: string) {
const conn = this.connections.get(socketId)
if (conn) {
conn.lastActivity = Date.now()
}
}
private evictLRU() {
let lruSocket = ''
let lruTime = Date.now()
for (const [socketId, conn] of this.connections) {
if (conn.lastActivity < lruTime && conn.priority === 'low') {
lruSocket = socketId
lruTime = conn.lastActivity
}
}
if (lruSocket) {
this.connections.delete(lruSocket)
}
}
}
2⃣ 中期优化 (1-2周)
2.1 实现消息队列
问题: 高并发时消息丢失 方案: 集成Bull队列库
npm install bull redis
// src/services/MessageQueue.ts (新文件)
import Queue from 'bull'
export class MessageQueueService {
private screenDataQueue: Queue.Queue
private controlCommandQueue: Queue.Queue
constructor() {
this.screenDataQueue = new Queue('screen-data', {
redis: { host: 'localhost', port: 6379 },
defaultJobOptions: {
attempts: 3,
backoff: { type: 'exponential', delay: 2000 },
removeOnComplete: true
}
})
this.controlCommandQueue = new Queue('control-command', {
redis: { host: 'localhost', port: 6379 },
defaultJobOptions: {
priority: 10, // 控制命令优先级高
removeOnComplete: true
}
})
this.setupProcessors()
}
private setupProcessors() {
this.screenDataQueue.process(10, async (job) => {
// 处理屏幕数据
return this.processScreenData(job.data)
})
this.controlCommandQueue.process(20, async (job) => {
// 处理控制命令
return this.processControlCommand(job.data)
})
}
async enqueueScreenData(data: ScreenData) {
await this.screenDataQueue.add(data, { delay: 0 })
}
async enqueueControlCommand(command: ControlMessage) {
await this.controlCommandQueue.add(command, { priority: 10 })
}
}
2.2 添加Redis缓存层
问题: 频繁数据库查询 方案: 使用Redis缓存热数据
// src/services/CacheService.ts (新文件)
import redis from 'redis'
export class CacheService {
private client: redis.RedisClient
private readonly TTL = 300 // 5分钟
constructor() {
this.client = redis.createClient({
host: 'localhost',
port: 6379,
db: 0
})
}
async getDevice(deviceId: string) {
const cached = await this.client.get(`device:${deviceId}`)
return cached ? JSON.parse(cached) : null
}
async setDevice(deviceId: string, data: any) {
await this.client.setex(`device:${deviceId}`, this.TTL, JSON.stringify(data))
}
async getDeviceState(deviceId: string) {
const cached = await this.client.get(`state:${deviceId}`)
return cached ? JSON.parse(cached) : null
}
async setDeviceState(deviceId: string, state: any) {
await this.client.setex(`state:${deviceId}`, this.TTL, JSON.stringify(state))
}
async invalidateDevice(deviceId: string) {
await this.client.del(`device:${deviceId}`)
await this.client.del(`state:${deviceId}`)
}
}
2.3 实现连接监控和告警
问题: 无法及时发现性能问题 方案: 添加Prometheus指标
// src/services/MetricsService.ts (新文件)
import { Counter, Gauge, Histogram } from 'prom-client'
export class MetricsService {
private messageCounter = new Counter({
name: 'messages_total',
help: 'Total messages processed',
labelNames: ['type', 'status']
})
private connectionGauge = new Gauge({
name: 'active_connections',
help: 'Number of active connections',
labelNames: ['type']
})
private latencyHistogram = new Histogram({
name: 'message_latency_ms',
help: 'Message processing latency',
labelNames: ['type'],
buckets: [10, 50, 100, 200, 500, 1000]
})
recordMessage(type: string, status: 'success' | 'failed') {
this.messageCounter.inc({ type, status })
}
setConnections(type: string, count: number) {
this.connectionGauge.set({ type }, count)
}
recordLatency(type: string, ms: number) {
this.latencyHistogram.observe({ type }, ms)
}
}
3⃣ 长期优化 (1个月+)
3.1 实现分布式架构
方案: 使用Socket.IO Adapter支持多服务器
npm install @socket.io/redis-adapter
import { createAdapter } from '@socket.io/redis-adapter'
import { createClient } from 'redis'
const pubClient = createClient({ host: 'localhost', port: 6379 })
const subClient = pubClient.duplicate()
io.adapter(createAdapter(pubClient, subClient))
3.2 实现负载均衡
方案: 使用Nginx反向代理
upstream socket_servers {
server localhost:3001;
server localhost:3002;
server localhost:3003;
}
server {
listen 80;
location / {
proxy_pass http://socket_servers;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
}
}
3.3 实现CDN支持
方案: 使用CDN加速大文件传输
// 屏幕截图上传到CDN
async uploadScreenshotToCDN(deviceId: string, data: Buffer) {
const key = `screenshots/${deviceId}/${Date.now()}.jpg`
const url = await this.cdnService.upload(key, data)
// 发送CDN URL而不是原始数据
this.webClientManager.sendToClient(clientId, 'screen_data', {
deviceId,
url,
timestamp: Date.now()
})
}
4⃣ 性能测试和监控
4.1 添加性能测试
npm install --save-dev autocannon
// test/performance.test.ts
import autocannon from 'autocannon'
async function runPerformanceTest() {
const result = await autocannon({
url: 'http://localhost:3001',
connections: 100,
duration: 30,
requests: [
{
path: '/api/devices',
method: 'GET',
headers: { 'Authorization': 'Bearer token' }
}
]
})
console.log(result)
}
4.2 监控关键指标
// 在 MessageRouter 中添加
private logPerformanceMetrics() {
setInterval(() => {
const memUsage = process.memoryUsage()
const uptime = process.uptime()
this.logger.info(`
性能指标:
- 内存: ${Math.round(memUsage.heapUsed / 1024 / 1024)}MB / ${Math.round(memUsage.heapTotal / 1024 / 1024)}MB
- 运行时间: ${Math.round(uptime)}s
- 屏幕帧: ${this.routedFrames} (丢帧: ${this.droppedFrames})
- 丢帧率: ${((this.droppedFrames / this.routedFrames) * 100).toFixed(2)}%
- 连接数: ${this.deviceManager.getDeviceCount()}
`)
}, 60000) // 每分钟输出一次
}
5⃣ 配置建议
生产环境启动参数
# 启用垃圾回收监控
node --expose-gc dist/index.js
# 启用性能分析
node --prof dist/index.js
# 增加内存限制
node --max-old-space-size=2048 dist/index.js
环境变量配置
# .env
NODE_ENV=production
LOG_LEVEL=info
MAX_CONNECTIONS=1000
MEMORY_LIMIT=500
BATCH_SIZE=10
BATCH_TIMEOUT=50
CACHE_TTL=300
REDIS_HOST=localhost
REDIS_PORT=6379
预期改进
| 指标 | 优化前 | 优化后 | 改进 |
|---|---|---|---|
| 平均延迟 | 150ms | 80ms | ↓47% |
| 吞吐量 | 50设备 | 200设备 | ↑300% |
| 内存占用 | 400MB | 250MB | ↓37% |
| 丢帧率 | 5% | 1% | ↓80% |
| CPU占用 | 60% | 35% | ↓42% |
故障排查
问题: 内存持续增长
解决方案:
- 检查缓冲区是否正确清理
- 启用垃圾回收:
node --expose-gc - 检查数据库连接是否泄漏
问题: 丢帧率高
解决方案:
- 检查网络带宽
- 增加缓冲区大小
- 启用消息批处理
问题: 连接频繁断开
解决方案:
- 增加心跳超时时间
- 检查防火墙配置
- 启用连接池管理