From 10028a0e2e46c6173dbe3ecb54811bbe4ed967ab Mon Sep 17 00:00:00 2001 From: wdvipa Date: Sun, 15 Feb 2026 17:42:44 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E8=AE=BE=E5=A4=87?= =?UTF-8?q?=E6=B3=A8=E5=86=8C=E5=BB=B6=E8=BF=9F=E5=AF=BC=E8=87=B4=E4=B8=AD?= =?UTF-8?q?=E7=BB=A7=E6=8A=A5=E7=A6=BB=E7=BA=BF=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 服务端新连接8秒后检测未注册Socket,主动发送ping_for_registration - 断开验证延迟从1.5秒增加到5秒,容忍polling传输抖动 - verifyDeviceDisconnection增加近期活动检测,避免误清理 - performSecondaryDeviceCheck增加设备重连检测(socketId变化跳过清理) - 一致性检查不活跃阈值从2分钟增加到3分钟 - 清理服务端日志中的emoji符号 --- devices.db | Bin 585728 -> 585728 bytes src/index.ts | 173 +++++++++++++++++++++++++-------------------------- 2 files changed, 86 insertions(+), 87 deletions(-) diff --git a/devices.db b/devices.db index 05c56d5b6a9fa59c5e7499db4707545b5742253e..c7a60ba9dcccf6ab62b8a2b23584f33b360f4303 100644 GIT binary patch delta 66 zcmZp8pxp34d4e>f-9#B@M!Ss(uJW8FhE|3qdWOcv&Fkdb*U2*iF%u9o12GE_vjQ<2 N5VLPzC(jXd0RYaE70mzu delta 66 zcmZp8pxp34d4e>f { - this.logger.info(`连接升级: ${socket.id} -> ${socket.conn.transport.name}`) + this.logger.info(`[Conn] Transport upgrade: ${socket.id} -> ${socket.conn.transport.name}`) }) socket.conn.on('upgradeError', (error: any) => { - this.logger.warn(`连接升级失败: ${socket.id}`, error) + this.logger.warn(`[Conn] Transport upgrade failed: ${socket.id}`, error) }) socket.on('disconnecting', (reason: string) => { - this.logger.warn(`⚠️ 连接即将断开: ${socket.id}, 原因: ${reason}`) + this.logger.warn(`[Conn] Disconnecting: ${socket.id}, reason: ${reason}`) }) + // 主动检测未注册的连接,确保设备不会因注册丢失而显示离线 + const REGISTRATION_CHECK_DELAY_MS = 8000 + setTimeout(() => { + if (!socket.clientType && socket.connected) { + this.logger.warn(`[Conn] Socket ${socket.id} connected ${REGISTRATION_CHECK_DELAY_MS}ms ago but not registered, sending ping_for_registration`) + socket.emit('ping_for_registration', { + requireReregistration: true, + reason: 'unregistered_connection_detected', + serverTime: new Date().toISOString() + }) + } + }, REGISTRATION_CHECK_DELAY_MS) + // 🔧 设备注册 - 使用队列处理 socket.on('device_register', (data: any) => { this.queueDeviceRegistration(socket, data) @@ -1765,13 +1777,13 @@ class RemoteControlServer { if (socket.clientType === 'device' && socket.deviceId) { const deviceId = socket.deviceId - this.logger.warn(`🔍 设备Socket断开: ${deviceId} (${socket.id})`) + this.logger.warn(`[Disconnect] Device socket disconnected: ${deviceId} (${socket.id})`) - // 🔧 优化:短延迟验证断开状态,平衡误判防护和真实断开检测速度 - // 因为Socket.IO的disconnect事件可能因为网络抖动等原因被误触发,但真正断开应该快速处理 + // Delay verification to tolerate polling transport jitter and reconnect race + // Polling transport may briefly drop the socket during upgrade or network hiccup setTimeout(() => { this.verifyDeviceDisconnection(deviceId, socket.id) - }, 1500) // 1.5秒后验证,更快响应真实断开 + }, 5000) // 5s grace period for reconnect } else if (socket.clientType === 'web' && socket.clientId) { // 🔧 优化Web客户端断开处理 @@ -1994,57 +2006,45 @@ class RemoteControlServer { private checkAndFixInconsistentStates(): void { try { const memoryDevices = this.deviceManager.getAllDevices() - let fixedCount = 0 const currentTime = Date.now() - this.logger.debug(`🔍 开始状态一致性检查,检查 ${memoryDevices.length} 个设备`) + this.logger.debug(`[Consistency] Checking ${memoryDevices.length} devices`) for (const device of memoryDevices) { const socket = this.io.sockets.sockets.get(device.socketId) - - // 🔧 修复:增加多重验证条件,避免误判 const socketExists = !!socket const socketConnected = socket?.connected || false const timeSinceLastSeen = currentTime - device.lastSeen.getTime() - const isRecentlyActive = timeSinceLastSeen < 180000 // 3分钟内有活动 + const timeSinceConnected = currentTime - device.connectedAt.getTime() - this.logger.debug(`📊 设备 ${device.id} 状态检查: socket存在=${socketExists}, 连接=${socketConnected}, 最后活跃=${Math.round(timeSinceLastSeen / 1000)}秒前`) + this.logger.debug(`[Consistency] Device ${device.id}: socket=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`) + + // Only remove if: + // 1. Socket completely gone (not just disconnected) + // 2. No activity for 3 minutes + // 3. Connected for more than 2 minutes (avoid race with fresh connections) + const INACTIVE_THRESHOLD_MS = 180000 // 3 minutes + const MIN_CONNECTION_AGE_MS = 120000 // 2 minutes - // 🔧 平衡的断开判断逻辑:快速检测真实断开,避免心跳期间误判 - // 1. Socket必须完全不存在(不检查connected状态,因为心跳期间可能瞬时为false) - // 2. 且设备超过2分钟无活动(适中的容错时间,足够检测真实断开) - // 3. 且不是刚连接的设备(避免恢复期间的竞态条件) const shouldRemove = !socketExists && - timeSinceLastSeen > 120000 && // 2分钟无活动才考虑断开 - (currentTime - device.connectedAt.getTime()) > 60000 // 连接超过1分钟才检查 + timeSinceLastSeen > INACTIVE_THRESHOLD_MS && + timeSinceConnected > MIN_CONNECTION_AGE_MS if (shouldRemove) { - this.logger.warn(`⚠️ 确认设备真正断开: ${device.id} (${device.name})`) - this.logger.warn(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`) - this.logger.warn(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`) - this.logger.warn(` - 连接时长: ${Math.round((currentTime - device.connectedAt.getTime()) / 1000)}秒`) + this.logger.warn(`[Consistency] Device ${device.id} (${device.name}) confirmed disconnected`) + this.logger.warn(` socketExists=${socketExists}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s, connAge=${Math.round(timeSinceConnected / 1000)}s`) - // 🔧 优化:适中的二次确认延迟,快速清理真正断开的设备 + // Secondary check after 5s delay setTimeout(() => { this.performSecondaryDeviceCheck(device.id, device.socketId) - }, 3000) // 3秒后二次确认 - - } else { - // 设备状态正常或在容错范围内 - if (!socketExists || !socketConnected) { - this.logger.debug(`⏸️ 设备 ${device.id} Socket状态异常但在容错范围内 (最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前)`) - } + }, 5000) + } else if (!socketExists || !socketConnected) { + this.logger.debug(`[Consistency] Device ${device.id} socket abnormal but within tolerance (lastSeen: ${Math.round(timeSinceLastSeen / 1000)}s ago)`) } } - if (fixedCount > 0) { - this.logger.info(`🔧 状态一致性检查完成,修复了 ${fixedCount} 个不一致状态`) - } else { - this.logger.debug(`✅ 状态一致性检查完成,所有设备状态正常`) - } - } catch (error) { - this.logger.error('状态一致性检查失败:', error) + this.logger.error('[Consistency] Check failed:', error) } } @@ -2061,13 +2061,13 @@ class RemoteControlServer { try { const device = this.deviceManager.getDevice(deviceId) if (!device) { - this.logger.debug(`📋 验证断开时设备 ${deviceId} 已不在内存中,可能已被其他逻辑清理`) + this.logger.debug(`[Verify] Device ${deviceId} already removed from memory, skip`) return } - // 检查设备是否已经重新连接(新的Socket ID) + // Device reconnected with a new socket - skip cleanup if (device.socketId !== socketId) { - this.logger.info(`✅ 设备 ${deviceId} 已重新连接,新Socket: ${device.socketId},跳过断开处理`) + this.logger.info(`[Verify] Device ${deviceId} reconnected with new socket: ${device.socketId}, skip`) return } @@ -2075,32 +2075,36 @@ class RemoteControlServer { const currentTime = Date.now() const timeSinceLastSeen = currentTime - device.lastSeen.getTime() - // 🔧 优化:区分不同断开场景的检查条件 const socketExists = !!socket const socketConnected = socket?.connected || false - const hasRecentActivity = timeSinceLastSeen < 5000 // 5秒内有活动 + const hasRecentActivity = timeSinceLastSeen < 10000 // 10s recent activity window - this.logger.info(`🔍 验证设备 ${deviceId} 断开状态:`) - this.logger.info(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`) - this.logger.info(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`) - this.logger.info(` - 近期活跃: ${hasRecentActivity}`) + this.logger.info(`[Verify] Device ${deviceId}: socketExists=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`) - // 🔧 关键优化:如果Socket不存在,很可能是真正的断开 + // Socket completely gone - confirm real disconnect if (!socketExists) { - this.logger.warn(`❌ Socket完全不存在,确认设备真实断开: ${deviceId}`) + // Double check: if device had recent activity, give more time + if (hasRecentActivity) { + this.logger.info(`[Verify] Socket gone but device ${deviceId} had recent activity, defer cleanup by 10s`) + setTimeout(() => { + this.performSecondaryDeviceCheck(deviceId, socketId) + }, 10000) + return + } + this.logger.warn(`[Verify] Socket gone and no recent activity, cleanup device: ${deviceId}`) this.executeDeviceCleanup(deviceId, device) return } - // 🔧 如果Socket存在但未连接,且无近期活动,尝试主动测试连接 + // Socket exists but not connected, and no recent activity - test connection if (!socketConnected && !hasRecentActivity) { - this.logger.warn(`🔍 Socket存在但未连接,主动测试设备连接: ${deviceId}`) + this.logger.warn(`[Verify] Socket exists but not connected, testing device: ${deviceId}`) this.testDeviceConnection(deviceId, socketId, device) return } - // 设备状态正常,确保Web端知道设备在线 - this.logger.info(`✅ 验证结果:设备 ${deviceId} 仍然在线,disconnect事件是误报`) + // Device still alive - broadcast online status + this.logger.info(`[Verify] Device ${deviceId} still online, disconnect was false alarm`) this.webClientManager.broadcastToAll('device_status_update', { deviceId: device.id, status: { @@ -2112,7 +2116,7 @@ class RemoteControlServer { }) } catch (error) { - this.logger.error(`验证设备断开失败 (${deviceId}):`, error) + this.logger.error(`[Verify] Failed for device ${deviceId}:`, error) } } @@ -2122,39 +2126,35 @@ class RemoteControlServer { private testDeviceConnection(deviceId: string, socketId: string, device: any): void { const socket = this.io.sockets.sockets.get(socketId) if (!socket) { - this.logger.warn(`❌ 测试连接时Socket已不存在: ${deviceId}`) + this.logger.warn(`[ConnTest] Socket gone for device ${deviceId}`) this.executeDeviceCleanup(deviceId, device) return } - this.logger.info(`📡 向设备 ${deviceId} 发送连接测试`) + this.logger.info(`[ConnTest] Testing device ${deviceId}`) - // 设置响应超时 let responded = false + const CONNECTION_TEST_TIMEOUT_MS = 15000 // 15s timeout for polling transport const timeout = setTimeout(() => { if (!responded) { - this.logger.warn(`⏰ 设备 ${deviceId} 连接测试超时,确认断开`) + this.logger.warn(`[ConnTest] Device ${deviceId} timed out after ${CONNECTION_TEST_TIMEOUT_MS}ms`) this.executeDeviceCleanup(deviceId, device) } - }, 5000) // 5秒超时 + }, CONNECTION_TEST_TIMEOUT_MS) - // 发送测试ping try { socket.emit('CONNECTION_TEST', { timestamp: Date.now(), testId: `verify_${Date.now()}` }) - // 监听一次性响应 const responseHandler = (data: any) => { responded = true clearTimeout(timeout) - this.logger.info(`✅ 设备 ${deviceId} 连接测试成功,设备仍在线`) + this.logger.info(`[ConnTest] Device ${deviceId} responded, still online`) - // 更新设备活跃时间 device.lastSeen = new Date() - // 确保Web端知道设备在线 this.webClientManager.broadcastToAll('device_status_update', { deviceId: device.id, status: { @@ -2165,7 +2165,6 @@ class RemoteControlServer { } }) - // 清理监听器 socket.off('CONNECTION_TEST_RESPONSE', responseHandler) } @@ -2174,7 +2173,7 @@ class RemoteControlServer { } catch (error) { responded = true clearTimeout(timeout) - this.logger.error(`❌ 发送连接测试失败: ${deviceId}`, error) + this.logger.error(`[ConnTest] Failed to send test to device ${deviceId}:`, error) this.executeDeviceCleanup(deviceId, device) } } @@ -2183,28 +2182,26 @@ class RemoteControlServer { * 🆕 执行设备清理逻辑 */ private executeDeviceCleanup(deviceId: string, device: any): void { - this.logger.warn(`🧹 执行设备清理: ${deviceId} (${device.name})`) + this.logger.warn(`[Cleanup] Removing device: ${deviceId} (${device.name})`) - // 释放控制权 + // Release control if held const controllerId = this.webClientManager.getDeviceController(deviceId) if (controllerId) { - this.logger.info(`🔓 设备断开,自动释放控制权: ${deviceId} (控制者: ${controllerId})`) + this.logger.info(`[Cleanup] Releasing control for device ${deviceId} (controller: ${controllerId})`) this.webClientManager.releaseDeviceControl(deviceId) - // 通知控制的Web客户端设备已断开 this.webClientManager.sendToClient(controllerId, 'device_control_lost', { deviceId: deviceId, reason: 'device_disconnected', - message: '设备已断开连接' + message: 'Device disconnected' }) } - // 清理设备 this.deviceManager.removeDevice(deviceId) this.databaseService.setDeviceOffline(deviceId) this.webClientManager.broadcastToAll('device_disconnected', deviceId) - this.logger.info(`✅ 已清理断开的设备: ${device.name} (${deviceId})`) + this.logger.info(`[Cleanup] Device removed: ${device.name} (${deviceId})`) } /** @@ -2214,30 +2211,32 @@ class RemoteControlServer { try { const device = this.deviceManager.getDevice(deviceId) if (!device) { - this.logger.debug(`📋 二次检查时设备 ${deviceId} 已不在内存中,跳过`) + this.logger.debug(`[SecondaryCheck] Device ${deviceId} already removed, skip`) return } - const socket = this.io.sockets.sockets.get(socketId) + // Device reconnected with new socket + if (device.socketId !== socketId) { + this.logger.info(`[SecondaryCheck] Device ${deviceId} reconnected with new socket ${device.socketId}, skip`) + return + } + + const socket = this.io.sockets.sockets.get(device.socketId) const currentTime = Date.now() const timeSinceLastSeen = currentTime - device.lastSeen.getTime() - // 🔧 优化:二次检查条件更合理,60秒无活动就考虑断开 const socketExists = !!socket const socketConnected = socket?.connected || false - const isInactive = timeSinceLastSeen > 60000 // 1分钟无活动 + const INACTIVE_THRESHOLD_MS = 90000 // 90s no activity - this.logger.info(`🔍 二次确认设备 ${deviceId} 状态:`) - this.logger.info(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`) - this.logger.info(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`) + this.logger.info(`[SecondaryCheck] Device ${deviceId}: socket=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`) - if (!socketExists || (!socketConnected && isInactive)) { - this.logger.warn(`❌ 二次确认:设备 ${deviceId} 确实已断开,执行清理`) + if (!socketExists || (!socketConnected && timeSinceLastSeen > INACTIVE_THRESHOLD_MS)) { + this.logger.warn(`[SecondaryCheck] Device ${deviceId} confirmed disconnected, cleanup`) this.executeDeviceCleanup(deviceId, device) } else { - this.logger.info(`✅ 二次确认:设备 ${deviceId} 状态正常,保持连接`) + this.logger.info(`[SecondaryCheck] Device ${deviceId} still alive`) - // 设备状态恢复正常,确保Web端知道设备在线 if (socketExists && socketConnected) { this.webClientManager.broadcastToAll('device_status_update', { deviceId: device.id, @@ -2252,7 +2251,7 @@ class RemoteControlServer { } } catch (error) { - this.logger.error(`二次设备检查失败 (${deviceId}):`, error) + this.logger.error(`[SecondaryCheck] Failed for device ${deviceId}:`, error) } }