From b670794d9aeeaa6d8bdf2aaa384dd9a51b4be512 Mon Sep 17 00:00:00 2001 From: laforetbrut Date: Wed, 22 Apr 2026 07:16:47 +0200 Subject: [PATCH] Phase 9: cap wait time on alive-peer ghost sessions (fixes 30-60s join delay) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reproduction (from production logs, 2026-04-22): 02:54:13 - 02:54:44 player 724b9ff8 waits 30s for server 1708833664 (60 attempts) 02:54:31 - 02:55:02 player 46284b41 waits 30s for server 0 (zombie) 05:10:53 - 05:11:55 player 95d0db86 waits 62s for server 1708833664 (120 attempts) 05:10:59 - 05:12:01 player 724b9ff8 waits 62s for server 1708833664 (120 attempts) User report: 'un joueur se connecte et son inventaire s'affiche 30 secondes après sa connexion'. Root cause: doPlayerJoin's last_server poll waits for the previous server to clear online=0. If the peer is alive (heartbeat fresh) but the player is ghost-online there (proxy bypass, network drop, or actively playing on the other server without clean logout), the peer NEVER flushes → we wait the full join_poll_max_attempts * join_poll_interval_ms (60s default) for nothing. Meanwhile the player sees an empty inventory on this server. The zombie-peer short-circuit already handled dead peers. This commit adds the complementary case: ALIVE peers with a stuck session. Fix: - New config key join_peer_alive_max_wait_seconds (default 5, range 0-600). - When the peer's heartbeat is fresh but player.online is still 1, wait at most this many seconds, then force-claim ownership by setting online=0 AND last_server=self. - The peer will be prevented from overwriting us: writeSnapshotToDB already has the last_server guard (added in Phase 2) which blocks any future save the peer issues for this player — they see a GUARD log and skip downstream backpack/SS/RS2 writes. - Default 5s is a reasonable trade-off: legitimate slow flushes complete within that window, ghost sessions don't block the player 60s+. - Set to 0 to force-claim immediately (most aggressive, best for proxies). - Set high to restore the legacy behavior (wait full poll length). Also removed the per-tick 'Player X still being saved...' LOGGER.info line that was spamming the Minecraft server log every 500ms during a ghost wait — the SyncLogger.raceCondition entry already captures the same information in the dedicated sync.log and avoids polluting server.log with 120+ lines per join. --- .../fubuki/playersync/config/JdbcConfig.java | 11 +++++++ .../fubuki/playersync/sync/VanillaSync.java | 32 ++++++++++++++++--- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/main/java/vip/fubuki/playersync/config/JdbcConfig.java b/src/main/java/vip/fubuki/playersync/config/JdbcConfig.java index d1b7372..f06e0fa 100644 --- a/src/main/java/vip/fubuki/playersync/config/JdbcConfig.java +++ b/src/main/java/vip/fubuki/playersync/config/JdbcConfig.java @@ -57,6 +57,7 @@ public class JdbcConfig { public static ModConfigSpec.IntValue PEER_STALE_THRESHOLD_SECONDS; public static ModConfigSpec.IntValue JOIN_POLL_MAX_ATTEMPTS; public static ModConfigSpec.IntValue JOIN_POLL_INTERVAL_MS; + public static ModConfigSpec.IntValue JOIN_PEER_ALIVE_MAX_WAIT_SECONDS; public static ModConfigSpec.IntValue POOL_STATS_INTERVAL_MINUTES; public static ModConfigSpec.IntValue HIKARI_POOL_MAX_SIZE; public static ModConfigSpec.IntValue HIKARI_LEAK_THRESHOLD_MS; @@ -174,6 +175,16 @@ public class JdbcConfig { JOIN_POLL_INTERVAL_MS = B.comment( "Wait interval between last_server poll attempts (milliseconds).") .defineInRange("join_poll_interval_ms", 500, 100, 5000); + JOIN_PEER_ALIVE_MAX_WAIT_SECONDS = B.comment( + "When the previous server is ALIVE (heartbeat fresh) but the player row still", + "shows online=1 on it, how long to wait before force-claiming ownership on this", + "server. Prevents the 30-60s 'empty inventory' window when a player active on", + "peer A connects to peer B without cleanly logging out (proxy, network drop,", + "dup session). After this timeout, peer A will simply fail to save this player", + "(blocked by last_server guard) and their next disconnect won't overwrite B's", + "data. Default 5s. Set to 0 to force-claim immediately; set high to restore the", + "legacy behavior of waiting for the peer to flush.") + .defineInRange("join_peer_alive_max_wait_seconds", 5, 0, 600); POOL_STATS_INTERVAL_MINUTES = B.comment( "How often PoolStatsReporter logs executor + Hikari stats. 0 to disable.") .defineInRange("pool_stats_interval_minutes", 5, 0, 1440); diff --git a/src/main/java/vip/fubuki/playersync/sync/VanillaSync.java b/src/main/java/vip/fubuki/playersync/sync/VanillaSync.java index 7bcded3..54fa307 100644 --- a/src/main/java/vip/fubuki/playersync/sync/VanillaSync.java +++ b/src/main/java/vip/fubuki/playersync/sync/VanillaSync.java @@ -390,6 +390,14 @@ public class VanillaSync { final int MAX_POLL = JdbcConfig.JOIN_POLL_MAX_ATTEMPTS.get(); final int POLL_INTERVAL_MS = JdbcConfig.JOIN_POLL_INTERVAL_MS.get(); final long STALE_HEARTBEAT_MS = JdbcConfig.PEER_STALE_THRESHOLD_SECONDS.get() * 1000L; + // PHASE 9: when the peer is alive (heartbeat fresh) but the player row still shows + // online=1 on it — typical of a ghost session (proxy, network drop, or the user + // walking between servers without clean logout) — waiting the full 60s is useless: + // the peer will never flush because the session is technically active there. We + // cap the wait at this shorter window, then force-claim and rely on the last_server + // guard in writeSnapshotToDB to prevent the peer from overwriting us later. + final long PEER_ALIVE_MAX_WAIT_MS = JdbcConfig.JOIN_PEER_ALIVE_MAX_WAIT_SECONDS.get() * 1000L; + final long pollStartTime = System.currentTimeMillis(); for (int attempt = 0; attempt < MAX_POLL; attempt++) { try (JDBCsetUp.QueryResult qrCheck = JDBCsetUp.executePreparedQuery( "SELECT online, last_server FROM " + Tables.playerData() + " WHERE uuid=?", player_uuid)) { @@ -402,20 +410,34 @@ public class VanillaSync { // FIX P1-3: zombie-server short-circuit. server_id=0 is never // a legitimate server (SERVER_ID config generates nextInt(1, MAX-1)). // Absent or stale (>60s) heartbeat => treat as dead, take over. - if (otherServer == 0 || isPeerServerStale(otherServer, STALE_HEARTBEAT_MS)) { + boolean peerDead = (otherServer == 0 || isPeerServerStale(otherServer, STALE_HEARTBEAT_MS)); + if (peerDead) { SyncLogger.raceCondition(player_uuid, "Peer server " + otherServer + " is dead/zombie — taking over after " + attempt + " attempts"); PlayerSync.LOGGER.warn("Player {} last_server={} is dead/zombie, bypassing wait", player_uuid, otherServer); - // Force-clear its online flag so subsequent logic proceeds cleanly. JDBCsetUp.executePreparedUpdate( "UPDATE " + Tables.playerData() + " SET online=0 WHERE uuid=? AND last_server=?", player_uuid, otherServer); break; } - SyncLogger.raceCondition(player_uuid, "Waiting for server " + otherServer + " to finish saving (attempt " + (attempt + 1) + "/" + MAX_POLL + ")"); - PlayerSync.LOGGER.info("Player {} still being saved on server {} (attempt {}/{}), waiting 500ms...", - player_uuid, otherServer, attempt + 1, MAX_POLL); + // PHASE 9: peer ALIVE but session hasn't flushed — enforce short cap. + long waitedMs = System.currentTimeMillis() - pollStartTime; + if (waitedMs >= PEER_ALIVE_MAX_WAIT_MS) { + SyncLogger.raceCondition(player_uuid, + "Peer server " + otherServer + " is alive but player ghost-online for " + + waitedMs + "ms — force-claiming ownership"); + PlayerSync.LOGGER.warn( + "Player {} still online=1 on alive peer server {} after {}ms — force-takeover on this server. Peer's future saves will be blocked by the last_server guard.", + player_uuid, otherServer, waitedMs); + // Force-clear the flag and claim — writeSnapshotToDB will guard + // the peer's next write so they can't overwrite us. + JDBCsetUp.executePreparedUpdate( + "UPDATE " + Tables.playerData() + " SET online=0 WHERE uuid=? AND last_server=?", + player_uuid, otherServer); + break; + } + SyncLogger.raceCondition(player_uuid, "Waiting for server " + otherServer + " to finish saving (attempt " + (attempt + 1) + "/" + MAX_POLL + ", waited=" + waitedMs + "ms)"); Thread.sleep(POLL_INTERVAL_MS); continue; }