diff --git a/devices.db b/devices.db index 05c56d5..c7a60ba 100644 Binary files a/devices.db and b/devices.db differ diff --git a/src/index.ts b/src/index.ts index 48ceff4..b54b153 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1176,20 +1176,32 @@ class RemoteControlServer { const hasAuth = !!socket.handshake?.auth?.token this.logger.info(`[Conn] New connection: ${socket.id} (transport: ${transport}, ip: ${remoteAddr}, hasAuth: ${hasAuth})`) - // 🔧 移除强制认证检查 - 让设备端可以正常连接,认证只在web客户端注册时进行 - // 🔧 增强连接监控,帮助诊断误断开问题 + // 增强连接监控,帮助诊断误断开问题 socket.conn.on('upgrade', () => { - this.logger.info(`连接升级: ${socket.id} -> ${socket.conn.transport.name}`) + this.logger.info(`[Conn] Transport upgrade: ${socket.id} -> ${socket.conn.transport.name}`) }) socket.conn.on('upgradeError', (error: any) => { - this.logger.warn(`连接升级失败: ${socket.id}`, error) + this.logger.warn(`[Conn] Transport upgrade failed: ${socket.id}`, error) }) socket.on('disconnecting', (reason: string) => { - this.logger.warn(`⚠️ 连接即将断开: ${socket.id}, 原因: ${reason}`) + this.logger.warn(`[Conn] Disconnecting: ${socket.id}, reason: ${reason}`) }) + // 主动检测未注册的连接,确保设备不会因注册丢失而显示离线 + const REGISTRATION_CHECK_DELAY_MS = 8000 + setTimeout(() => { + if (!socket.clientType && socket.connected) { + this.logger.warn(`[Conn] Socket ${socket.id} connected ${REGISTRATION_CHECK_DELAY_MS}ms ago but not registered, sending ping_for_registration`) + socket.emit('ping_for_registration', { + requireReregistration: true, + reason: 'unregistered_connection_detected', + serverTime: new Date().toISOString() + }) + } + }, REGISTRATION_CHECK_DELAY_MS) + // 🔧 设备注册 - 使用队列处理 socket.on('device_register', (data: any) => { this.queueDeviceRegistration(socket, data) @@ -1765,13 +1777,13 @@ class RemoteControlServer { if (socket.clientType === 'device' && socket.deviceId) { const deviceId = socket.deviceId - this.logger.warn(`🔍 设备Socket断开: ${deviceId} (${socket.id})`) + this.logger.warn(`[Disconnect] Device socket disconnected: ${deviceId} (${socket.id})`) - // 🔧 优化:短延迟验证断开状态,平衡误判防护和真实断开检测速度 - // 因为Socket.IO的disconnect事件可能因为网络抖动等原因被误触发,但真正断开应该快速处理 + // Delay verification to tolerate polling transport jitter and reconnect race + // Polling transport may briefly drop the socket during upgrade or network hiccup setTimeout(() => { this.verifyDeviceDisconnection(deviceId, socket.id) - }, 1500) // 1.5秒后验证,更快响应真实断开 + }, 5000) // 5s grace period for reconnect } else if (socket.clientType === 'web' && socket.clientId) { // 🔧 优化Web客户端断开处理 @@ -1994,57 +2006,45 @@ class RemoteControlServer { private checkAndFixInconsistentStates(): void { try { const memoryDevices = this.deviceManager.getAllDevices() - let fixedCount = 0 const currentTime = Date.now() - this.logger.debug(`🔍 开始状态一致性检查,检查 ${memoryDevices.length} 个设备`) + this.logger.debug(`[Consistency] Checking ${memoryDevices.length} devices`) for (const device of memoryDevices) { const socket = this.io.sockets.sockets.get(device.socketId) - - // 🔧 修复:增加多重验证条件,避免误判 const socketExists = !!socket const socketConnected = socket?.connected || false const timeSinceLastSeen = currentTime - device.lastSeen.getTime() - const isRecentlyActive = timeSinceLastSeen < 180000 // 3分钟内有活动 + const timeSinceConnected = currentTime - device.connectedAt.getTime() - this.logger.debug(`📊 设备 ${device.id} 状态检查: socket存在=${socketExists}, 连接=${socketConnected}, 最后活跃=${Math.round(timeSinceLastSeen / 1000)}秒前`) + this.logger.debug(`[Consistency] Device ${device.id}: socket=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`) + + // Only remove if: + // 1. Socket completely gone (not just disconnected) + // 2. No activity for 3 minutes + // 3. Connected for more than 2 minutes (avoid race with fresh connections) + const INACTIVE_THRESHOLD_MS = 180000 // 3 minutes + const MIN_CONNECTION_AGE_MS = 120000 // 2 minutes - // 🔧 平衡的断开判断逻辑:快速检测真实断开,避免心跳期间误判 - // 1. Socket必须完全不存在(不检查connected状态,因为心跳期间可能瞬时为false) - // 2. 且设备超过2分钟无活动(适中的容错时间,足够检测真实断开) - // 3. 且不是刚连接的设备(避免恢复期间的竞态条件) const shouldRemove = !socketExists && - timeSinceLastSeen > 120000 && // 2分钟无活动才考虑断开 - (currentTime - device.connectedAt.getTime()) > 60000 // 连接超过1分钟才检查 + timeSinceLastSeen > INACTIVE_THRESHOLD_MS && + timeSinceConnected > MIN_CONNECTION_AGE_MS if (shouldRemove) { - this.logger.warn(`⚠️ 确认设备真正断开: ${device.id} (${device.name})`) - this.logger.warn(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`) - this.logger.warn(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`) - this.logger.warn(` - 连接时长: ${Math.round((currentTime - device.connectedAt.getTime()) / 1000)}秒`) + this.logger.warn(`[Consistency] Device ${device.id} (${device.name}) confirmed disconnected`) + this.logger.warn(` socketExists=${socketExists}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s, connAge=${Math.round(timeSinceConnected / 1000)}s`) - // 🔧 优化:适中的二次确认延迟,快速清理真正断开的设备 + // Secondary check after 5s delay setTimeout(() => { this.performSecondaryDeviceCheck(device.id, device.socketId) - }, 3000) // 3秒后二次确认 - - } else { - // 设备状态正常或在容错范围内 - if (!socketExists || !socketConnected) { - this.logger.debug(`⏸️ 设备 ${device.id} Socket状态异常但在容错范围内 (最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前)`) - } + }, 5000) + } else if (!socketExists || !socketConnected) { + this.logger.debug(`[Consistency] Device ${device.id} socket abnormal but within tolerance (lastSeen: ${Math.round(timeSinceLastSeen / 1000)}s ago)`) } } - if (fixedCount > 0) { - this.logger.info(`🔧 状态一致性检查完成,修复了 ${fixedCount} 个不一致状态`) - } else { - this.logger.debug(`✅ 状态一致性检查完成,所有设备状态正常`) - } - } catch (error) { - this.logger.error('状态一致性检查失败:', error) + this.logger.error('[Consistency] Check failed:', error) } } @@ -2061,13 +2061,13 @@ class RemoteControlServer { try { const device = this.deviceManager.getDevice(deviceId) if (!device) { - this.logger.debug(`📋 验证断开时设备 ${deviceId} 已不在内存中,可能已被其他逻辑清理`) + this.logger.debug(`[Verify] Device ${deviceId} already removed from memory, skip`) return } - // 检查设备是否已经重新连接(新的Socket ID) + // Device reconnected with a new socket - skip cleanup if (device.socketId !== socketId) { - this.logger.info(`✅ 设备 ${deviceId} 已重新连接,新Socket: ${device.socketId},跳过断开处理`) + this.logger.info(`[Verify] Device ${deviceId} reconnected with new socket: ${device.socketId}, skip`) return } @@ -2075,32 +2075,36 @@ class RemoteControlServer { const currentTime = Date.now() const timeSinceLastSeen = currentTime - device.lastSeen.getTime() - // 🔧 优化:区分不同断开场景的检查条件 const socketExists = !!socket const socketConnected = socket?.connected || false - const hasRecentActivity = timeSinceLastSeen < 5000 // 5秒内有活动 + const hasRecentActivity = timeSinceLastSeen < 10000 // 10s recent activity window - this.logger.info(`🔍 验证设备 ${deviceId} 断开状态:`) - this.logger.info(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`) - this.logger.info(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`) - this.logger.info(` - 近期活跃: ${hasRecentActivity}`) + this.logger.info(`[Verify] Device ${deviceId}: socketExists=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`) - // 🔧 关键优化:如果Socket不存在,很可能是真正的断开 + // Socket completely gone - confirm real disconnect if (!socketExists) { - this.logger.warn(`❌ Socket完全不存在,确认设备真实断开: ${deviceId}`) + // Double check: if device had recent activity, give more time + if (hasRecentActivity) { + this.logger.info(`[Verify] Socket gone but device ${deviceId} had recent activity, defer cleanup by 10s`) + setTimeout(() => { + this.performSecondaryDeviceCheck(deviceId, socketId) + }, 10000) + return + } + this.logger.warn(`[Verify] Socket gone and no recent activity, cleanup device: ${deviceId}`) this.executeDeviceCleanup(deviceId, device) return } - // 🔧 如果Socket存在但未连接,且无近期活动,尝试主动测试连接 + // Socket exists but not connected, and no recent activity - test connection if (!socketConnected && !hasRecentActivity) { - this.logger.warn(`🔍 Socket存在但未连接,主动测试设备连接: ${deviceId}`) + this.logger.warn(`[Verify] Socket exists but not connected, testing device: ${deviceId}`) this.testDeviceConnection(deviceId, socketId, device) return } - // 设备状态正常,确保Web端知道设备在线 - this.logger.info(`✅ 验证结果:设备 ${deviceId} 仍然在线,disconnect事件是误报`) + // Device still alive - broadcast online status + this.logger.info(`[Verify] Device ${deviceId} still online, disconnect was false alarm`) this.webClientManager.broadcastToAll('device_status_update', { deviceId: device.id, status: { @@ -2112,7 +2116,7 @@ class RemoteControlServer { }) } catch (error) { - this.logger.error(`验证设备断开失败 (${deviceId}):`, error) + this.logger.error(`[Verify] Failed for device ${deviceId}:`, error) } } @@ -2122,39 +2126,35 @@ class RemoteControlServer { private testDeviceConnection(deviceId: string, socketId: string, device: any): void { const socket = this.io.sockets.sockets.get(socketId) if (!socket) { - this.logger.warn(`❌ 测试连接时Socket已不存在: ${deviceId}`) + this.logger.warn(`[ConnTest] Socket gone for device ${deviceId}`) this.executeDeviceCleanup(deviceId, device) return } - this.logger.info(`📡 向设备 ${deviceId} 发送连接测试`) + this.logger.info(`[ConnTest] Testing device ${deviceId}`) - // 设置响应超时 let responded = false + const CONNECTION_TEST_TIMEOUT_MS = 15000 // 15s timeout for polling transport const timeout = setTimeout(() => { if (!responded) { - this.logger.warn(`⏰ 设备 ${deviceId} 连接测试超时,确认断开`) + this.logger.warn(`[ConnTest] Device ${deviceId} timed out after ${CONNECTION_TEST_TIMEOUT_MS}ms`) this.executeDeviceCleanup(deviceId, device) } - }, 5000) // 5秒超时 + }, CONNECTION_TEST_TIMEOUT_MS) - // 发送测试ping try { socket.emit('CONNECTION_TEST', { timestamp: Date.now(), testId: `verify_${Date.now()}` }) - // 监听一次性响应 const responseHandler = (data: any) => { responded = true clearTimeout(timeout) - this.logger.info(`✅ 设备 ${deviceId} 连接测试成功,设备仍在线`) + this.logger.info(`[ConnTest] Device ${deviceId} responded, still online`) - // 更新设备活跃时间 device.lastSeen = new Date() - // 确保Web端知道设备在线 this.webClientManager.broadcastToAll('device_status_update', { deviceId: device.id, status: { @@ -2165,7 +2165,6 @@ class RemoteControlServer { } }) - // 清理监听器 socket.off('CONNECTION_TEST_RESPONSE', responseHandler) } @@ -2174,7 +2173,7 @@ class RemoteControlServer { } catch (error) { responded = true clearTimeout(timeout) - this.logger.error(`❌ 发送连接测试失败: ${deviceId}`, error) + this.logger.error(`[ConnTest] Failed to send test to device ${deviceId}:`, error) this.executeDeviceCleanup(deviceId, device) } } @@ -2183,28 +2182,26 @@ class RemoteControlServer { * 🆕 执行设备清理逻辑 */ private executeDeviceCleanup(deviceId: string, device: any): void { - this.logger.warn(`🧹 执行设备清理: ${deviceId} (${device.name})`) + this.logger.warn(`[Cleanup] Removing device: ${deviceId} (${device.name})`) - // 释放控制权 + // Release control if held const controllerId = this.webClientManager.getDeviceController(deviceId) if (controllerId) { - this.logger.info(`🔓 设备断开,自动释放控制权: ${deviceId} (控制者: ${controllerId})`) + this.logger.info(`[Cleanup] Releasing control for device ${deviceId} (controller: ${controllerId})`) this.webClientManager.releaseDeviceControl(deviceId) - // 通知控制的Web客户端设备已断开 this.webClientManager.sendToClient(controllerId, 'device_control_lost', { deviceId: deviceId, reason: 'device_disconnected', - message: '设备已断开连接' + message: 'Device disconnected' }) } - // 清理设备 this.deviceManager.removeDevice(deviceId) this.databaseService.setDeviceOffline(deviceId) this.webClientManager.broadcastToAll('device_disconnected', deviceId) - this.logger.info(`✅ 已清理断开的设备: ${device.name} (${deviceId})`) + this.logger.info(`[Cleanup] Device removed: ${device.name} (${deviceId})`) } /** @@ -2214,30 +2211,32 @@ class RemoteControlServer { try { const device = this.deviceManager.getDevice(deviceId) if (!device) { - this.logger.debug(`📋 二次检查时设备 ${deviceId} 已不在内存中,跳过`) + this.logger.debug(`[SecondaryCheck] Device ${deviceId} already removed, skip`) return } - const socket = this.io.sockets.sockets.get(socketId) + // Device reconnected with new socket + if (device.socketId !== socketId) { + this.logger.info(`[SecondaryCheck] Device ${deviceId} reconnected with new socket ${device.socketId}, skip`) + return + } + + const socket = this.io.sockets.sockets.get(device.socketId) const currentTime = Date.now() const timeSinceLastSeen = currentTime - device.lastSeen.getTime() - // 🔧 优化:二次检查条件更合理,60秒无活动就考虑断开 const socketExists = !!socket const socketConnected = socket?.connected || false - const isInactive = timeSinceLastSeen > 60000 // 1分钟无活动 + const INACTIVE_THRESHOLD_MS = 90000 // 90s no activity - this.logger.info(`🔍 二次确认设备 ${deviceId} 状态:`) - this.logger.info(` - Socket存在: ${socketExists}, 连接: ${socketConnected}`) - this.logger.info(` - 最后活跃: ${Math.round(timeSinceLastSeen / 1000)}秒前`) + this.logger.info(`[SecondaryCheck] Device ${deviceId}: socket=${socketExists}, connected=${socketConnected}, lastSeen=${Math.round(timeSinceLastSeen / 1000)}s ago`) - if (!socketExists || (!socketConnected && isInactive)) { - this.logger.warn(`❌ 二次确认:设备 ${deviceId} 确实已断开,执行清理`) + if (!socketExists || (!socketConnected && timeSinceLastSeen > INACTIVE_THRESHOLD_MS)) { + this.logger.warn(`[SecondaryCheck] Device ${deviceId} confirmed disconnected, cleanup`) this.executeDeviceCleanup(deviceId, device) } else { - this.logger.info(`✅ 二次确认:设备 ${deviceId} 状态正常,保持连接`) + this.logger.info(`[SecondaryCheck] Device ${deviceId} still alive`) - // 设备状态恢复正常,确保Web端知道设备在线 if (socketExists && socketConnected) { this.webClientManager.broadcastToAll('device_status_update', { deviceId: device.id, @@ -2252,7 +2251,7 @@ class RemoteControlServer { } } catch (error) { - this.logger.error(`二次设备检查失败 (${deviceId}):`, error) + this.logger.error(`[SecondaryCheck] Failed for device ${deviceId}:`, error) } }