fix: 修复设备注册延迟导致中继报离线问题

- 服务端新连接8秒后检测未注册Socket,主动发送ping_for_registration
- 断开验证延迟从1.5秒增加到5秒,容忍polling传输抖动
- verifyDeviceDisconnection增加近期活动检测,避免误清理
- performSecondaryDeviceCheck增加设备重连检测(socketId变化跳过清理)
- 一致性检查不活跃阈值从2分钟增加到3分钟
- 清理服务端日志中的emoji符号
This commit is contained in:
wdvipa
2026-02-15 17:42:44 +08:00
parent af927dd9b4
commit 10028a0e2e
2 changed files with 86 additions and 87 deletions

Binary file not shown.

View File

@@ -1176,20 +1176,32 @@ class RemoteControlServer {
const hasAuth = !!socket.handshake?.auth?.token
this.logger.info(`[Conn] New connection: ${socket.id} (transport: ${transport}, ip: ${remoteAddr}, hasAuth: ${hasAuth})`)
// 🔧 移除强制认证检查 - 让设备端可以正常连接认证只在web客户端注册时进行
// 🔧 增强连接监控,帮助诊断误断开问题
// 增强连接监控,帮助诊断误断开问题
socket.conn.on('upgrade', () => {
this.logger.info(`连接升级: ${socket.id} -> ${socket.conn.transport.name}`)
this.logger.info(`[Conn] Transport upgrade: ${socket.id} -> ${socket.conn.transport.name}`)
})
socket.conn.on('upgradeError', (error: any) => {
this.logger.warn(`连接升级失败: ${socket.id}`, error)
this.logger.warn(`[Conn] Transport upgrade failed: ${socket.id}`, error)
})
socket.on('disconnecting', (reason: string) => {
this.logger.warn(`⚠️ 连接即将断开: ${socket.id}, 原因: ${reason}`)
this.logger.warn(`[Conn] Disconnecting: ${socket.id}, reason: ${reason}`)
})
// 主动检测未注册的连接,确保设备不会因注册丢失而显示离线
const REGISTRATION_CHECK_DELAY_MS = 8000
setTimeout(() => {
if (!socket.clientType && socket.connected) {
this.logger.warn(`[Conn] Socket ${socket.id} connected ${REGISTRATION_CHECK_DELAY_MS}ms ago but not registered, sending ping_for_registration`)
socket.emit('ping_for_registration', {
requireReregistration: true,
reason: 'unregistered_connection_detected',
serverTime: new Date().toISOString()
})
}
}, REGISTRATION_CHECK_DELAY_MS)
// 🔧 设备注册 - 使用队列处理
socket.on('device_register', (data: any) => {
this.queueDeviceRegistration(socket, data)
@@ -1765,13 +1777,13 @@ class RemoteControlServer {
if (socket.clientType === 'device' && socket.deviceId) {
const deviceId = socket.deviceId
this.logger.warn(`🔍 设备Socket断开: ${deviceId} (${socket.id})`)
this.logger.warn(`[Disconnect] Device socket disconnected: ${deviceId} (${socket.id})`)
// 🔧 优化:短延迟验证断开状态,平衡误判防护和真实断开检测速度
// 因为Socket.IO的disconnect事件可能因为网络抖动等原因被误触发但真正断开应该快速处理
// Delay verification to tolerate polling transport jitter and reconnect race
// Polling transport may briefly drop the socket during upgrade or network hiccup
setTimeout(() => {
this.verifyDeviceDisconnection(deviceId, socket.id)
}, 1500) // 1.5秒后验证,更快响应真实断开
}, 5000) // 5s grace period for reconnect
} else if (socket.clientType === 'web' && socket.clientId) {
// 🔧 优化Web客户端断开处理
@@ -1994,57 +2006,45 @@ class RemoteControlServer {
private checkAndFixInconsistentStates(): void {
try {
const memoryDevices = this.deviceManager.getAllDevices()
let fixedCount = 0
const currentTime = Date.now()
this.logger.debug(`🔍 开始状态一致性检查,检查 ${memoryDevices.length} 个设备`)
this.logger.debug(`[Consistency] Checking ${memoryDevices.length} devices`)
for (const device of memoryDevices) {
const socket = this.io.sockets.sockets.get(device.socketId)
// 🔧 修复:增加多重验证条件,避免误判
const socketExists = !!socket
const socketConnected = socket?.connected || false
const timeSinceLastSeen = currentTime - device.lastSeen.getTime()
const isRecentlyActive = timeSinceLastSeen < 180000 // 3分钟内有活动
const timeSinceConnected = currentTime - device.connectedAt.getTime()
this.logger.debug(`📊 设备 ${device.id} 状态检查: socket存在=${socketExists}, 连接=${socketConnected}, 最后活跃=${Math.round(timeSinceLastSeen / 1000)}秒前`)
this.logger.debug(`[Consistency] Device ${device.id}: socket=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`)
// Only remove if:
// 1. Socket completely gone (not just disconnected)
// 2. No activity for 3 minutes
// 3. Connected for more than 2 minutes (avoid race with fresh connections)
const INACTIVE_THRESHOLD_MS = 180000 // 3 minutes
const MIN_CONNECTION_AGE_MS = 120000 // 2 minutes
// 🔧 平衡的断开判断逻辑:快速检测真实断开,避免心跳期间误判
// 1. Socket必须完全不存在不检查connected状态因为心跳期间可能瞬时为false
// 2. 且设备超过2分钟无活动适中的容错时间足够检测真实断开
// 3. 且不是刚连接的设备(避免恢复期间的竞态条件)
const shouldRemove = !socketExists &&
timeSinceLastSeen > 120000 && // 2分钟无活动才考虑断开
(currentTime - device.connectedAt.getTime()) > 60000 // 连接超过1分钟才检查
timeSinceLastSeen > INACTIVE_THRESHOLD_MS &&
timeSinceConnected > MIN_CONNECTION_AGE_MS
if (shouldRemove) {
this.logger.warn(`⚠️ 确认设备真正断开: ${device.id} (${device.name})`)
this.logger.warn(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`)
this.logger.warn(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`)
this.logger.warn(` - 连接时长: ${Math.round((currentTime - device.connectedAt.getTime()) / 1000)}`)
this.logger.warn(`[Consistency] Device ${device.id} (${device.name}) confirmed disconnected`)
this.logger.warn(` socketExists=${socketExists}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s, connAge=${Math.round(timeSinceConnected / 1000)}s`)
// 🔧 优化:适中的二次确认延迟,快速清理真正断开的设备
// Secondary check after 5s delay
setTimeout(() => {
this.performSecondaryDeviceCheck(device.id, device.socketId)
}, 3000) // 3秒后二次确认
} else {
// 设备状态正常或在容错范围内
if (!socketExists || !socketConnected) {
this.logger.debug(`⏸️ 设备 ${device.id} Socket状态异常但在容错范围内 (最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前)`)
}
}, 5000)
} else if (!socketExists || !socketConnected) {
this.logger.debug(`[Consistency] Device ${device.id} socket abnormal but within tolerance (lastSeen: ${Math.round(timeSinceLastSeen / 1000)}s ago)`)
}
}
if (fixedCount > 0) {
this.logger.info(`🔧 状态一致性检查完成,修复了 ${fixedCount} 个不一致状态`)
} else {
this.logger.debug(`✅ 状态一致性检查完成,所有设备状态正常`)
}
} catch (error) {
this.logger.error('状态一致性检查失败:', error)
this.logger.error('[Consistency] Check failed:', error)
}
}
@@ -2061,13 +2061,13 @@ class RemoteControlServer {
try {
const device = this.deviceManager.getDevice(deviceId)
if (!device) {
this.logger.debug(`📋 验证断开时设备 ${deviceId} 已不在内存中,可能已被其他逻辑清理`)
this.logger.debug(`[Verify] Device ${deviceId} already removed from memory, skip`)
return
}
// 检查设备是否已经重新连接新的Socket ID
// Device reconnected with a new socket - skip cleanup
if (device.socketId !== socketId) {
this.logger.info(`✅ 设备 ${deviceId} 已重新连接新Socket: ${device.socketId},跳过断开处理`)
this.logger.info(`[Verify] Device ${deviceId} reconnected with new socket: ${device.socketId}, skip`)
return
}
@@ -2075,32 +2075,36 @@ class RemoteControlServer {
const currentTime = Date.now()
const timeSinceLastSeen = currentTime - device.lastSeen.getTime()
// 🔧 优化:区分不同断开场景的检查条件
const socketExists = !!socket
const socketConnected = socket?.connected || false
const hasRecentActivity = timeSinceLastSeen < 5000 // 5秒内有活动
const hasRecentActivity = timeSinceLastSeen < 10000 // 10s recent activity window
this.logger.info(`🔍 验证设备 ${deviceId} 断开状态:`)
this.logger.info(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`)
this.logger.info(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`)
this.logger.info(` - 近期活跃: ${hasRecentActivity}`)
this.logger.info(`[Verify] Device ${deviceId}: socketExists=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`)
// 🔧 关键优化如果Socket不存在很可能是真正的断开
// Socket completely gone - confirm real disconnect
if (!socketExists) {
this.logger.warn(`❌ Socket完全不存在确认设备真实断开: ${deviceId}`)
// Double check: if device had recent activity, give more time
if (hasRecentActivity) {
this.logger.info(`[Verify] Socket gone but device ${deviceId} had recent activity, defer cleanup by 10s`)
setTimeout(() => {
this.performSecondaryDeviceCheck(deviceId, socketId)
}, 10000)
return
}
this.logger.warn(`[Verify] Socket gone and no recent activity, cleanup device: ${deviceId}`)
this.executeDeviceCleanup(deviceId, device)
return
}
// 🔧 如果Socket存在但未连接,且无近期活动,尝试主动测试连接
// Socket exists but not connected, and no recent activity - test connection
if (!socketConnected && !hasRecentActivity) {
this.logger.warn(`🔍 Socket存在但未连接主动测试设备连接: ${deviceId}`)
this.logger.warn(`[Verify] Socket exists but not connected, testing device: ${deviceId}`)
this.testDeviceConnection(deviceId, socketId, device)
return
}
// 设备状态正常确保Web端知道设备在线
this.logger.info(`✅ 验证结果:设备 ${deviceId} 仍然在线disconnect事件是误报`)
// Device still alive - broadcast online status
this.logger.info(`[Verify] Device ${deviceId} still online, disconnect was false alarm`)
this.webClientManager.broadcastToAll('device_status_update', {
deviceId: device.id,
status: {
@@ -2112,7 +2116,7 @@ class RemoteControlServer {
})
} catch (error) {
this.logger.error(`验证设备断开失败 (${deviceId}):`, error)
this.logger.error(`[Verify] Failed for device ${deviceId}:`, error)
}
}
@@ -2122,39 +2126,35 @@ class RemoteControlServer {
private testDeviceConnection(deviceId: string, socketId: string, device: any): void {
const socket = this.io.sockets.sockets.get(socketId)
if (!socket) {
this.logger.warn(`❌ 测试连接时Socket已不存在: ${deviceId}`)
this.logger.warn(`[ConnTest] Socket gone for device ${deviceId}`)
this.executeDeviceCleanup(deviceId, device)
return
}
this.logger.info(`📡 向设备 ${deviceId} 发送连接测试`)
this.logger.info(`[ConnTest] Testing device ${deviceId}`)
// 设置响应超时
let responded = false
const CONNECTION_TEST_TIMEOUT_MS = 15000 // 15s timeout for polling transport
const timeout = setTimeout(() => {
if (!responded) {
this.logger.warn(`⏰ 设备 ${deviceId} 连接测试超时,确认断开`)
this.logger.warn(`[ConnTest] Device ${deviceId} timed out after ${CONNECTION_TEST_TIMEOUT_MS}ms`)
this.executeDeviceCleanup(deviceId, device)
}
}, 5000) // 5秒超时
}, CONNECTION_TEST_TIMEOUT_MS)
// 发送测试ping
try {
socket.emit('CONNECTION_TEST', {
timestamp: Date.now(),
testId: `verify_${Date.now()}`
})
// 监听一次性响应
const responseHandler = (data: any) => {
responded = true
clearTimeout(timeout)
this.logger.info(`✅ 设备 ${deviceId} 连接测试成功,设备仍在线`)
this.logger.info(`[ConnTest] Device ${deviceId} responded, still online`)
// 更新设备活跃时间
device.lastSeen = new Date()
// 确保Web端知道设备在线
this.webClientManager.broadcastToAll('device_status_update', {
deviceId: device.id,
status: {
@@ -2165,7 +2165,6 @@ class RemoteControlServer {
}
})
// 清理监听器
socket.off('CONNECTION_TEST_RESPONSE', responseHandler)
}
@@ -2174,7 +2173,7 @@ class RemoteControlServer {
} catch (error) {
responded = true
clearTimeout(timeout)
this.logger.error(`❌ 发送连接测试失败: ${deviceId}`, error)
this.logger.error(`[ConnTest] Failed to send test to device ${deviceId}:`, error)
this.executeDeviceCleanup(deviceId, device)
}
}
@@ -2183,28 +2182,26 @@ class RemoteControlServer {
* 🆕 执行设备清理逻辑
*/
private executeDeviceCleanup(deviceId: string, device: any): void {
this.logger.warn(`🧹 执行设备清理: ${deviceId} (${device.name})`)
this.logger.warn(`[Cleanup] Removing device: ${deviceId} (${device.name})`)
// 释放控制权
// Release control if held
const controllerId = this.webClientManager.getDeviceController(deviceId)
if (controllerId) {
this.logger.info(`🔓 设备断开,自动释放控制权: ${deviceId} (控制者: ${controllerId})`)
this.logger.info(`[Cleanup] Releasing control for device ${deviceId} (controller: ${controllerId})`)
this.webClientManager.releaseDeviceControl(deviceId)
// 通知控制的Web客户端设备已断开
this.webClientManager.sendToClient(controllerId, 'device_control_lost', {
deviceId: deviceId,
reason: 'device_disconnected',
message: '设备已断开连接'
message: 'Device disconnected'
})
}
// 清理设备
this.deviceManager.removeDevice(deviceId)
this.databaseService.setDeviceOffline(deviceId)
this.webClientManager.broadcastToAll('device_disconnected', deviceId)
this.logger.info(`✅ 已清理断开的设备: ${device.name} (${deviceId})`)
this.logger.info(`[Cleanup] Device removed: ${device.name} (${deviceId})`)
}
/**
@@ -2214,30 +2211,32 @@ class RemoteControlServer {
try {
const device = this.deviceManager.getDevice(deviceId)
if (!device) {
this.logger.debug(`📋 二次检查时设备 ${deviceId} 已不在内存中,跳过`)
this.logger.debug(`[SecondaryCheck] Device ${deviceId} already removed, skip`)
return
}
const socket = this.io.sockets.sockets.get(socketId)
// Device reconnected with new socket
if (device.socketId !== socketId) {
this.logger.info(`[SecondaryCheck] Device ${deviceId} reconnected with new socket ${device.socketId}, skip`)
return
}
const socket = this.io.sockets.sockets.get(device.socketId)
const currentTime = Date.now()
const timeSinceLastSeen = currentTime - device.lastSeen.getTime()
// 🔧 优化二次检查条件更合理60秒无活动就考虑断开
const socketExists = !!socket
const socketConnected = socket?.connected || false
const isInactive = timeSinceLastSeen > 60000 // 1分钟无活动
const INACTIVE_THRESHOLD_MS = 90000 // 90s no activity
this.logger.info(`🔍 二次确认设备 ${deviceId} 状态:`)
this.logger.info(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`)
this.logger.info(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`)
this.logger.info(`[SecondaryCheck] Device ${deviceId}: socket=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`)
if (!socketExists || (!socketConnected && isInactive)) {
this.logger.warn(`❌ 二次确认:设备 ${deviceId} 确实已断开,执行清理`)
if (!socketExists || (!socketConnected && timeSinceLastSeen > INACTIVE_THRESHOLD_MS)) {
this.logger.warn(`[SecondaryCheck] Device ${deviceId} confirmed disconnected, cleanup`)
this.executeDeviceCleanup(deviceId, device)
} else {
this.logger.info(`✅ 二次确认:设备 ${deviceId} 状态正常,保持连接`)
this.logger.info(`[SecondaryCheck] Device ${deviceId} still alive`)
// 设备状态恢复正常确保Web端知道设备在线
if (socketExists && socketConnected) {
this.webClientManager.broadcastToAll('device_status_update', {
deviceId: device.id,
@@ -2252,7 +2251,7 @@ class RemoteControlServer {
}
} catch (error) {
this.logger.error(`二次设备检查失败 (${deviceId}):`, error)
this.logger.error(`[SecondaryCheck] Failed for device ${deviceId}:`, error)
}
}