fix: 修复设备注册延迟导致中继报离线问题
- 服务端新连接8秒后检测未注册Socket,主动发送ping_for_registration - 断开验证延迟从1.5秒增加到5秒,容忍polling传输抖动 - verifyDeviceDisconnection增加近期活动检测,避免误清理 - performSecondaryDeviceCheck增加设备重连检测(socketId变化跳过清理) - 一致性检查不活跃阈值从2分钟增加到3分钟 - 清理服务端日志中的emoji符号
This commit is contained in:
173
src/index.ts
173
src/index.ts
@@ -1176,20 +1176,32 @@ class RemoteControlServer {
|
||||
const hasAuth = !!socket.handshake?.auth?.token
|
||||
this.logger.info(`[Conn] New connection: ${socket.id} (transport: ${transport}, ip: ${remoteAddr}, hasAuth: ${hasAuth})`)
|
||||
|
||||
// 🔧 移除强制认证检查 - 让设备端可以正常连接,认证只在web客户端注册时进行
|
||||
// 🔧 增强连接监控,帮助诊断误断开问题
|
||||
// 增强连接监控,帮助诊断误断开问题
|
||||
socket.conn.on('upgrade', () => {
|
||||
this.logger.info(`连接升级: ${socket.id} -> ${socket.conn.transport.name}`)
|
||||
this.logger.info(`[Conn] Transport upgrade: ${socket.id} -> ${socket.conn.transport.name}`)
|
||||
})
|
||||
|
||||
socket.conn.on('upgradeError', (error: any) => {
|
||||
this.logger.warn(`连接升级失败: ${socket.id}`, error)
|
||||
this.logger.warn(`[Conn] Transport upgrade failed: ${socket.id}`, error)
|
||||
})
|
||||
|
||||
socket.on('disconnecting', (reason: string) => {
|
||||
this.logger.warn(`⚠️ 连接即将断开: ${socket.id}, 原因: ${reason}`)
|
||||
this.logger.warn(`[Conn] Disconnecting: ${socket.id}, reason: ${reason}`)
|
||||
})
|
||||
|
||||
// 主动检测未注册的连接,确保设备不会因注册丢失而显示离线
|
||||
const REGISTRATION_CHECK_DELAY_MS = 8000
|
||||
setTimeout(() => {
|
||||
if (!socket.clientType && socket.connected) {
|
||||
this.logger.warn(`[Conn] Socket ${socket.id} connected ${REGISTRATION_CHECK_DELAY_MS}ms ago but not registered, sending ping_for_registration`)
|
||||
socket.emit('ping_for_registration', {
|
||||
requireReregistration: true,
|
||||
reason: 'unregistered_connection_detected',
|
||||
serverTime: new Date().toISOString()
|
||||
})
|
||||
}
|
||||
}, REGISTRATION_CHECK_DELAY_MS)
|
||||
|
||||
// 🔧 设备注册 - 使用队列处理
|
||||
socket.on('device_register', (data: any) => {
|
||||
this.queueDeviceRegistration(socket, data)
|
||||
@@ -1765,13 +1777,13 @@ class RemoteControlServer {
|
||||
|
||||
if (socket.clientType === 'device' && socket.deviceId) {
|
||||
const deviceId = socket.deviceId
|
||||
this.logger.warn(`🔍 设备Socket断开: ${deviceId} (${socket.id})`)
|
||||
this.logger.warn(`[Disconnect] Device socket disconnected: ${deviceId} (${socket.id})`)
|
||||
|
||||
// 🔧 优化:短延迟验证断开状态,平衡误判防护和真实断开检测速度
|
||||
// 因为Socket.IO的disconnect事件可能因为网络抖动等原因被误触发,但真正断开应该快速处理
|
||||
// Delay verification to tolerate polling transport jitter and reconnect race
|
||||
// Polling transport may briefly drop the socket during upgrade or network hiccup
|
||||
setTimeout(() => {
|
||||
this.verifyDeviceDisconnection(deviceId, socket.id)
|
||||
}, 1500) // 1.5秒后验证,更快响应真实断开
|
||||
}, 5000) // 5s grace period for reconnect
|
||||
|
||||
} else if (socket.clientType === 'web' && socket.clientId) {
|
||||
// 🔧 优化Web客户端断开处理
|
||||
@@ -1994,57 +2006,45 @@ class RemoteControlServer {
|
||||
private checkAndFixInconsistentStates(): void {
|
||||
try {
|
||||
const memoryDevices = this.deviceManager.getAllDevices()
|
||||
let fixedCount = 0
|
||||
const currentTime = Date.now()
|
||||
|
||||
this.logger.debug(`🔍 开始状态一致性检查,检查 ${memoryDevices.length} 个设备`)
|
||||
this.logger.debug(`[Consistency] Checking ${memoryDevices.length} devices`)
|
||||
|
||||
for (const device of memoryDevices) {
|
||||
const socket = this.io.sockets.sockets.get(device.socketId)
|
||||
|
||||
// 🔧 修复:增加多重验证条件,避免误判
|
||||
const socketExists = !!socket
|
||||
const socketConnected = socket?.connected || false
|
||||
const timeSinceLastSeen = currentTime - device.lastSeen.getTime()
|
||||
const isRecentlyActive = timeSinceLastSeen < 180000 // 3分钟内有活动
|
||||
const timeSinceConnected = currentTime - device.connectedAt.getTime()
|
||||
|
||||
this.logger.debug(`📊 设备 ${device.id} 状态检查: socket存在=${socketExists}, 连接=${socketConnected}, 最后活跃=${Math.round(timeSinceLastSeen / 1000)}秒前`)
|
||||
this.logger.debug(`[Consistency] Device ${device.id}: socket=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`)
|
||||
|
||||
// Only remove if:
|
||||
// 1. Socket completely gone (not just disconnected)
|
||||
// 2. No activity for 3 minutes
|
||||
// 3. Connected for more than 2 minutes (avoid race with fresh connections)
|
||||
const INACTIVE_THRESHOLD_MS = 180000 // 3 minutes
|
||||
const MIN_CONNECTION_AGE_MS = 120000 // 2 minutes
|
||||
|
||||
// 🔧 平衡的断开判断逻辑:快速检测真实断开,避免心跳期间误判
|
||||
// 1. Socket必须完全不存在(不检查connected状态,因为心跳期间可能瞬时为false)
|
||||
// 2. 且设备超过2分钟无活动(适中的容错时间,足够检测真实断开)
|
||||
// 3. 且不是刚连接的设备(避免恢复期间的竞态条件)
|
||||
const shouldRemove = !socketExists &&
|
||||
timeSinceLastSeen > 120000 && // 2分钟无活动才考虑断开
|
||||
(currentTime - device.connectedAt.getTime()) > 60000 // 连接超过1分钟才检查
|
||||
timeSinceLastSeen > INACTIVE_THRESHOLD_MS &&
|
||||
timeSinceConnected > MIN_CONNECTION_AGE_MS
|
||||
|
||||
if (shouldRemove) {
|
||||
this.logger.warn(`⚠️ 确认设备真正断开: ${device.id} (${device.name})`)
|
||||
this.logger.warn(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`)
|
||||
this.logger.warn(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`)
|
||||
this.logger.warn(` - 连接时长: ${Math.round((currentTime - device.connectedAt.getTime()) / 1000)}秒`)
|
||||
this.logger.warn(`[Consistency] Device ${device.id} (${device.name}) confirmed disconnected`)
|
||||
this.logger.warn(` socketExists=${socketExists}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s, connAge=${Math.round(timeSinceConnected / 1000)}s`)
|
||||
|
||||
// 🔧 优化:适中的二次确认延迟,快速清理真正断开的设备
|
||||
// Secondary check after 5s delay
|
||||
setTimeout(() => {
|
||||
this.performSecondaryDeviceCheck(device.id, device.socketId)
|
||||
}, 3000) // 3秒后二次确认
|
||||
|
||||
} else {
|
||||
// 设备状态正常或在容错范围内
|
||||
if (!socketExists || !socketConnected) {
|
||||
this.logger.debug(`⏸️ 设备 ${device.id} Socket状态异常但在容错范围内 (最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前)`)
|
||||
}
|
||||
}, 5000)
|
||||
} else if (!socketExists || !socketConnected) {
|
||||
this.logger.debug(`[Consistency] Device ${device.id} socket abnormal but within tolerance (lastSeen: ${Math.round(timeSinceLastSeen / 1000)}s ago)`)
|
||||
}
|
||||
}
|
||||
|
||||
if (fixedCount > 0) {
|
||||
this.logger.info(`🔧 状态一致性检查完成,修复了 ${fixedCount} 个不一致状态`)
|
||||
} else {
|
||||
this.logger.debug(`✅ 状态一致性检查完成,所有设备状态正常`)
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
this.logger.error('状态一致性检查失败:', error)
|
||||
this.logger.error('[Consistency] Check failed:', error)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2061,13 +2061,13 @@ class RemoteControlServer {
|
||||
try {
|
||||
const device = this.deviceManager.getDevice(deviceId)
|
||||
if (!device) {
|
||||
this.logger.debug(`📋 验证断开时设备 ${deviceId} 已不在内存中,可能已被其他逻辑清理`)
|
||||
this.logger.debug(`[Verify] Device ${deviceId} already removed from memory, skip`)
|
||||
return
|
||||
}
|
||||
|
||||
// 检查设备是否已经重新连接(新的Socket ID)
|
||||
// Device reconnected with a new socket - skip cleanup
|
||||
if (device.socketId !== socketId) {
|
||||
this.logger.info(`✅ 设备 ${deviceId} 已重新连接,新Socket: ${device.socketId},跳过断开处理`)
|
||||
this.logger.info(`[Verify] Device ${deviceId} reconnected with new socket: ${device.socketId}, skip`)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -2075,32 +2075,36 @@ class RemoteControlServer {
|
||||
const currentTime = Date.now()
|
||||
const timeSinceLastSeen = currentTime - device.lastSeen.getTime()
|
||||
|
||||
// 🔧 优化:区分不同断开场景的检查条件
|
||||
const socketExists = !!socket
|
||||
const socketConnected = socket?.connected || false
|
||||
const hasRecentActivity = timeSinceLastSeen < 5000 // 5秒内有活动
|
||||
const hasRecentActivity = timeSinceLastSeen < 10000 // 10s recent activity window
|
||||
|
||||
this.logger.info(`🔍 验证设备 ${deviceId} 断开状态:`)
|
||||
this.logger.info(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`)
|
||||
this.logger.info(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`)
|
||||
this.logger.info(` - 近期活跃: ${hasRecentActivity}`)
|
||||
this.logger.info(`[Verify] Device ${deviceId}: socketExists=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`)
|
||||
|
||||
// 🔧 关键优化:如果Socket不存在,很可能是真正的断开
|
||||
// Socket completely gone - confirm real disconnect
|
||||
if (!socketExists) {
|
||||
this.logger.warn(`❌ Socket完全不存在,确认设备真实断开: ${deviceId}`)
|
||||
// Double check: if device had recent activity, give more time
|
||||
if (hasRecentActivity) {
|
||||
this.logger.info(`[Verify] Socket gone but device ${deviceId} had recent activity, defer cleanup by 10s`)
|
||||
setTimeout(() => {
|
||||
this.performSecondaryDeviceCheck(deviceId, socketId)
|
||||
}, 10000)
|
||||
return
|
||||
}
|
||||
this.logger.warn(`[Verify] Socket gone and no recent activity, cleanup device: ${deviceId}`)
|
||||
this.executeDeviceCleanup(deviceId, device)
|
||||
return
|
||||
}
|
||||
|
||||
// 🔧 如果Socket存在但未连接,且无近期活动,尝试主动测试连接
|
||||
// Socket exists but not connected, and no recent activity - test connection
|
||||
if (!socketConnected && !hasRecentActivity) {
|
||||
this.logger.warn(`🔍 Socket存在但未连接,主动测试设备连接: ${deviceId}`)
|
||||
this.logger.warn(`[Verify] Socket exists but not connected, testing device: ${deviceId}`)
|
||||
this.testDeviceConnection(deviceId, socketId, device)
|
||||
return
|
||||
}
|
||||
|
||||
// 设备状态正常,确保Web端知道设备在线
|
||||
this.logger.info(`✅ 验证结果:设备 ${deviceId} 仍然在线,disconnect事件是误报`)
|
||||
// Device still alive - broadcast online status
|
||||
this.logger.info(`[Verify] Device ${deviceId} still online, disconnect was false alarm`)
|
||||
this.webClientManager.broadcastToAll('device_status_update', {
|
||||
deviceId: device.id,
|
||||
status: {
|
||||
@@ -2112,7 +2116,7 @@ class RemoteControlServer {
|
||||
})
|
||||
|
||||
} catch (error) {
|
||||
this.logger.error(`验证设备断开失败 (${deviceId}):`, error)
|
||||
this.logger.error(`[Verify] Failed for device ${deviceId}:`, error)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2122,39 +2126,35 @@ class RemoteControlServer {
|
||||
private testDeviceConnection(deviceId: string, socketId: string, device: any): void {
|
||||
const socket = this.io.sockets.sockets.get(socketId)
|
||||
if (!socket) {
|
||||
this.logger.warn(`❌ 测试连接时Socket已不存在: ${deviceId}`)
|
||||
this.logger.warn(`[ConnTest] Socket gone for device ${deviceId}`)
|
||||
this.executeDeviceCleanup(deviceId, device)
|
||||
return
|
||||
}
|
||||
|
||||
this.logger.info(`📡 向设备 ${deviceId} 发送连接测试`)
|
||||
this.logger.info(`[ConnTest] Testing device ${deviceId}`)
|
||||
|
||||
// 设置响应超时
|
||||
let responded = false
|
||||
const CONNECTION_TEST_TIMEOUT_MS = 15000 // 15s timeout for polling transport
|
||||
const timeout = setTimeout(() => {
|
||||
if (!responded) {
|
||||
this.logger.warn(`⏰ 设备 ${deviceId} 连接测试超时,确认断开`)
|
||||
this.logger.warn(`[ConnTest] Device ${deviceId} timed out after ${CONNECTION_TEST_TIMEOUT_MS}ms`)
|
||||
this.executeDeviceCleanup(deviceId, device)
|
||||
}
|
||||
}, 5000) // 5秒超时
|
||||
}, CONNECTION_TEST_TIMEOUT_MS)
|
||||
|
||||
// 发送测试ping
|
||||
try {
|
||||
socket.emit('CONNECTION_TEST', {
|
||||
timestamp: Date.now(),
|
||||
testId: `verify_${Date.now()}`
|
||||
})
|
||||
|
||||
// 监听一次性响应
|
||||
const responseHandler = (data: any) => {
|
||||
responded = true
|
||||
clearTimeout(timeout)
|
||||
this.logger.info(`✅ 设备 ${deviceId} 连接测试成功,设备仍在线`)
|
||||
this.logger.info(`[ConnTest] Device ${deviceId} responded, still online`)
|
||||
|
||||
// 更新设备活跃时间
|
||||
device.lastSeen = new Date()
|
||||
|
||||
// 确保Web端知道设备在线
|
||||
this.webClientManager.broadcastToAll('device_status_update', {
|
||||
deviceId: device.id,
|
||||
status: {
|
||||
@@ -2165,7 +2165,6 @@ class RemoteControlServer {
|
||||
}
|
||||
})
|
||||
|
||||
// 清理监听器
|
||||
socket.off('CONNECTION_TEST_RESPONSE', responseHandler)
|
||||
}
|
||||
|
||||
@@ -2174,7 +2173,7 @@ class RemoteControlServer {
|
||||
} catch (error) {
|
||||
responded = true
|
||||
clearTimeout(timeout)
|
||||
this.logger.error(`❌ 发送连接测试失败: ${deviceId}`, error)
|
||||
this.logger.error(`[ConnTest] Failed to send test to device ${deviceId}:`, error)
|
||||
this.executeDeviceCleanup(deviceId, device)
|
||||
}
|
||||
}
|
||||
@@ -2183,28 +2182,26 @@ class RemoteControlServer {
|
||||
* 🆕 执行设备清理逻辑
|
||||
*/
|
||||
private executeDeviceCleanup(deviceId: string, device: any): void {
|
||||
this.logger.warn(`🧹 执行设备清理: ${deviceId} (${device.name})`)
|
||||
this.logger.warn(`[Cleanup] Removing device: ${deviceId} (${device.name})`)
|
||||
|
||||
// 释放控制权
|
||||
// Release control if held
|
||||
const controllerId = this.webClientManager.getDeviceController(deviceId)
|
||||
if (controllerId) {
|
||||
this.logger.info(`🔓 设备断开,自动释放控制权: ${deviceId} (控制者: ${controllerId})`)
|
||||
this.logger.info(`[Cleanup] Releasing control for device ${deviceId} (controller: ${controllerId})`)
|
||||
this.webClientManager.releaseDeviceControl(deviceId)
|
||||
|
||||
// 通知控制的Web客户端设备已断开
|
||||
this.webClientManager.sendToClient(controllerId, 'device_control_lost', {
|
||||
deviceId: deviceId,
|
||||
reason: 'device_disconnected',
|
||||
message: '设备已断开连接'
|
||||
message: 'Device disconnected'
|
||||
})
|
||||
}
|
||||
|
||||
// 清理设备
|
||||
this.deviceManager.removeDevice(deviceId)
|
||||
this.databaseService.setDeviceOffline(deviceId)
|
||||
this.webClientManager.broadcastToAll('device_disconnected', deviceId)
|
||||
|
||||
this.logger.info(`✅ 已清理断开的设备: ${device.name} (${deviceId})`)
|
||||
this.logger.info(`[Cleanup] Device removed: ${device.name} (${deviceId})`)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2214,30 +2211,32 @@ class RemoteControlServer {
|
||||
try {
|
||||
const device = this.deviceManager.getDevice(deviceId)
|
||||
if (!device) {
|
||||
this.logger.debug(`📋 二次检查时设备 ${deviceId} 已不在内存中,跳过`)
|
||||
this.logger.debug(`[SecondaryCheck] Device ${deviceId} already removed, skip`)
|
||||
return
|
||||
}
|
||||
|
||||
const socket = this.io.sockets.sockets.get(socketId)
|
||||
// Device reconnected with new socket
|
||||
if (device.socketId !== socketId) {
|
||||
this.logger.info(`[SecondaryCheck] Device ${deviceId} reconnected with new socket ${device.socketId}, skip`)
|
||||
return
|
||||
}
|
||||
|
||||
const socket = this.io.sockets.sockets.get(device.socketId)
|
||||
const currentTime = Date.now()
|
||||
const timeSinceLastSeen = currentTime - device.lastSeen.getTime()
|
||||
|
||||
// 🔧 优化:二次检查条件更合理,60秒无活动就考虑断开
|
||||
const socketExists = !!socket
|
||||
const socketConnected = socket?.connected || false
|
||||
const isInactive = timeSinceLastSeen > 60000 // 1分钟无活动
|
||||
const INACTIVE_THRESHOLD_MS = 90000 // 90s no activity
|
||||
|
||||
this.logger.info(`🔍 二次确认设备 ${deviceId} 状态:`)
|
||||
this.logger.info(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`)
|
||||
this.logger.info(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`)
|
||||
this.logger.info(`[SecondaryCheck] Device ${deviceId}: socket=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`)
|
||||
|
||||
if (!socketExists || (!socketConnected && isInactive)) {
|
||||
this.logger.warn(`❌ 二次确认:设备 ${deviceId} 确实已断开,执行清理`)
|
||||
if (!socketExists || (!socketConnected && timeSinceLastSeen > INACTIVE_THRESHOLD_MS)) {
|
||||
this.logger.warn(`[SecondaryCheck] Device ${deviceId} confirmed disconnected, cleanup`)
|
||||
this.executeDeviceCleanup(deviceId, device)
|
||||
} else {
|
||||
this.logger.info(`✅ 二次确认:设备 ${deviceId} 状态正常,保持连接`)
|
||||
this.logger.info(`[SecondaryCheck] Device ${deviceId} still alive`)
|
||||
|
||||
// 设备状态恢复正常,确保Web端知道设备在线
|
||||
if (socketExists && socketConnected) {
|
||||
this.webClientManager.broadcastToAll('device_status_update', {
|
||||
deviceId: device.id,
|
||||
@@ -2252,7 +2251,7 @@ class RemoteControlServer {
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
this.logger.error(`二次设备检查失败 (${deviceId}):`, error)
|
||||
this.logger.error(`[SecondaryCheck] Failed for device ${deviceId}:`, error)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user