diff --git a/src/main/java/vip/fubuki/playersync/PlayerSync.java b/src/main/java/vip/fubuki/playersync/PlayerSync.java index b2a2208..9479ad0 100644 --- a/src/main/java/vip/fubuki/playersync/PlayerSync.java +++ b/src/main/java/vip/fubuki/playersync/PlayerSync.java @@ -207,6 +207,18 @@ public class PlayerSync { } catch (Exception e) { LOGGER.error("An exception occurred while trying change wrong player-status\n" + e.getMessage()); } + + // Phase 3: anti-loss infrastructure. + // 1. Clear orphaned online=1 flags from previous unclean shutdown. + // 2. Report zombie peer servers so admins see them in logs. + // 3. Install JVM shutdown hook — covers kill -9 / OOM / host reboot. + // 4. Start periodic heartbeat so peers can detect us as alive. + vip.fubuki.playersync.util.CrashRecovery.clearOrphanedOnlineFlags(); + vip.fubuki.playersync.util.CrashRecovery.reportZombiePeers(60_000L); + vip.fubuki.playersync.util.CrashRecovery.installShutdownHook(() -> + vip.fubuki.playersync.sync.VanillaSync.emergencyFlushAll()); + vip.fubuki.playersync.util.HeartbeatService.start(); + LOGGER.info("PlayerSync is ready!"); } diff --git a/src/main/java/vip/fubuki/playersync/sync/VanillaSync.java b/src/main/java/vip/fubuki/playersync/sync/VanillaSync.java index 0ab6d17..9bad5ac 100644 --- a/src/main/java/vip/fubuki/playersync/sync/VanillaSync.java +++ b/src/main/java/vip/fubuki/playersync/sync/VanillaSync.java @@ -1067,6 +1067,9 @@ public class VanillaSync { } JDBCsetUp.executePreparedUpdate("UPDATE " + Tables.serverInfo() + " SET enable=0 WHERE id=?", JdbcConfig.SERVER_ID.get()); + // Phase 3: stop heartbeat before pool shutdown so its tick doesn't race with pool close. + vip.fubuki.playersync.util.HeartbeatService.stop(); + // Shut down the background executor — no new tasks after this point executorService.shutdown(); try { @@ -1087,6 +1090,57 @@ public class VanillaSync { vip.fubuki.playersync.util.SyncLogger.shutdown(); } + /** + * Phase 3 emergency flush invoked from the JVM shutdown hook (kill -9, OOM, host + * reboot) when {@code onServerShutdown} never ran. Runs on the JVM shutdown thread, + * synchronously, WITHOUT the executor (which may be already draining or dead). + * + *
Best-effort: snapshots and writes every still-online player using direct
+ * DB calls. No lock acquisition — the server is dying, we just want data on disk.
+ * If the DB pool is already closed, we log and exit gracefully.
+ */
+ public static void emergencyFlushAll() {
+ try {
+ MinecraftServer server = ServerLifecycleHooks.getCurrentServer();
+ if (server == null) {
+ PlayerSync.LOGGER.warn("[emergency-flush] no server instance — nothing to flush");
+ return;
+ }
+ int flushed = 0;
+ for (ServerPlayer player : server.getPlayerList().getPlayers()) {
+ String puuid = player.getUUID().toString();
+ if (!player.getTags().contains("player_synced") || player.isDeadOrDying()) continue;
+ try {
+ final PlayerDataSnapshot snapshot = snapshotPlayerData(player);
+ final Map Installs a JVM shutdown hook that flushes pending saves and writes a
+ * graceful-shutdown marker into {@code server_info}. On next startup, scans
+ * {@code player_data} for rows stuck at {@code online=1} on this server and
+ * clears them — covers {@code kill -9} / OOM / JVM abort scenarios where the
+ * normal ServerStoppingEvent path never ran.
+ *
+ * Companion of {@link HeartbeatService} which keeps {@code server_info.last_update}
+ * fresh so peer servers can detect this one as alive.
+ *
+ * @author vyrriox
+ */
+public final class CrashRecovery {
+
+ private CrashRecovery() {}
+
+ private static final AtomicBoolean HOOK_INSTALLED = new AtomicBoolean(false);
+ private static volatile Runnable flushCallback;
+
+ /**
+ * Registers a JVM shutdown hook. Called once from PlayerSync.onServerStarting
+ * AFTER the DB pool is up. The {@code flushTask} is invoked on JVM shutdown —
+ * use it to snapshot all still-online players synchronously (no async executor,
+ * the pool may already be draining).
+ */
+ public static void installShutdownHook(Runnable flushTask) {
+ if (!HOOK_INSTALLED.compareAndSet(false, true)) return;
+ flushCallback = flushTask;
+
+ Thread hook = new Thread(() -> {
+ try {
+ PlayerSync.LOGGER.warn("[crash-recovery] JVM shutdown hook fired — flushing pending saves");
+ SyncLogger.playerEvent("SYSTEM", "JVM_SHUTDOWN_HOOK", "Flushing pending saves before JVM exit");
+ if (flushCallback != null) {
+ try {
+ flushCallback.run();
+ } catch (Throwable t) {
+ PlayerSync.LOGGER.error("[crash-recovery] flush callback threw", t);
+ }
+ }
+ // Mark this server as gracefully stopped so peers know it's dead.
+ try {
+ JDBCsetUp.executePreparedUpdate(
+ "UPDATE " + Tables.serverInfo() + " SET enable=0, last_update=? WHERE id=?",
+ System.currentTimeMillis(), JdbcConfig.SERVER_ID.get());
+ } catch (Exception e) {
+ PlayerSync.LOGGER.warn("[crash-recovery] could not mark server stopped: {}", e.getMessage());
+ }
+ } catch (Throwable t) {
+ // NEVER let the hook throw — it would block JVM exit.
+ PlayerSync.LOGGER.error("[crash-recovery] hook failed", t);
+ }
+ }, "PlayerSync-shutdown-hook");
+ hook.setDaemon(false); // MUST be non-daemon: daemon threads are killed on exit
+ Runtime.getRuntime().addShutdownHook(hook);
+ PlayerSync.LOGGER.info("[crash-recovery] JVM shutdown hook installed");
+ }
+
+ /**
+ * Scans {@code player_data} for orphaned online=1 rows on this server and
+ * clears them. Called from PlayerSync.onServerStarting AFTER the tables are
+ * created. This is the recovery path for players who were online when the
+ * server was killed ungracefully (kill -9, OOM, host reboot).
+ */
+ public static void clearOrphanedOnlineFlags() {
+ int serverId = JdbcConfig.SERVER_ID.get();
+ try {
+ // Count first so we know what we're about to clear.
+ int count = 0;
+ try (JDBCsetUp.QueryResult qr = JDBCsetUp.executePreparedQuery(
+ "SELECT COUNT(*) AS c FROM " + Tables.playerData() + " WHERE last_server=? AND online=1",
+ serverId)) {
+ ResultSet rs = qr.resultSet();
+ if (rs.next()) count = rs.getInt("c");
+ }
+ JDBCsetUp.executePreparedUpdate(
+ "UPDATE " + Tables.playerData() + " SET online=0 WHERE last_server=? AND online=1",
+ serverId);
+ if (count > 0) {
+ PlayerSync.LOGGER.warn("[crash-recovery] cleared {} orphan online=1 rows from previous session (server_id={})",
+ count, serverId);
+ SyncLogger.playerEvent("SYSTEM", "ORPHAN_CLEAR",
+ "Cleared " + count + " online=1 rows left by previous session crash");
+ } else {
+ PlayerSync.LOGGER.info("[crash-recovery] no orphan online=1 rows found — previous shutdown was clean");
+ }
+ } catch (Exception e) {
+ PlayerSync.LOGGER.error("[crash-recovery] failed to scan for orphans", e);
+ }
+ }
+
+ /**
+ * Reports peer servers whose heartbeat is stale. Informational — useful to
+ * surface zombie server_ids that could trip doPlayerJoin's poll. Called once
+ * on startup.
+ */
+ public static void reportZombiePeers(long staleAfterMs) {
+ try (JDBCsetUp.QueryResult qr = JDBCsetUp.executePreparedQuery(
+ "SELECT id, last_update FROM " + Tables.serverInfo() + " WHERE enable=1 AND id<>?",
+ JdbcConfig.SERVER_ID.get())) {
+ ResultSet rs = qr.resultSet();
+ long now = System.currentTimeMillis();
+ int zombies = 0;
+ while (rs.next()) {
+ int id = rs.getInt("id");
+ long last = rs.getLong("last_update");
+ long age = now - last;
+ if (id == 0 || age > staleAfterMs) {
+ zombies++;
+ PlayerSync.LOGGER.warn("[crash-recovery] peer server_id={} is zombie (last_update age={}ms, enabled=true)",
+ id, age);
+ }
+ }
+ if (zombies > 0) {
+ SyncLogger.playerEvent("SYSTEM", "ZOMBIE_PEERS", zombies + " peer server(s) appear stale");
+ }
+ } catch (Exception e) {
+ PlayerSync.LOGGER.warn("[crash-recovery] zombie peer scan failed: {}", e.getMessage());
+ }
+ }
+}
diff --git a/src/main/java/vip/fubuki/playersync/util/HeartbeatService.java b/src/main/java/vip/fubuki/playersync/util/HeartbeatService.java
new file mode 100644
index 0000000..d23caa6
--- /dev/null
+++ b/src/main/java/vip/fubuki/playersync/util/HeartbeatService.java
@@ -0,0 +1,65 @@
+package vip.fubuki.playersync.util;
+
+import vip.fubuki.playersync.PlayerSync;
+import vip.fubuki.playersync.config.JdbcConfig;
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * Periodic {@code server_info.last_update} heartbeat.
+ *
+ * Runs on a dedicated single-threaded scheduler at a fixed interval so peer
+ * servers can detect this server as alive via {@code isPeerServerStale()} in
+ * {@code VanillaSync.doPlayerJoin}. Without this, a server that stops issuing
+ * updates (e.g. hung main thread) would be treated as alive indefinitely by
+ * rejoining players on other servers, causing the 30s poll timeouts seen in
+ * production logs.
+ *
+ * @author vyrriox
+ */
+public final class HeartbeatService {
+
+ private HeartbeatService() {}
+
+ /** Heartbeat period: 10s. Short enough that a 60s staleness threshold catches real outages. */
+ private static final long PERIOD_MS = 10_000L;
+
+ private static final AtomicBoolean RUNNING = new AtomicBoolean(false);
+ private static ScheduledExecutorService scheduler;
+
+ public static void start() {
+ if (!RUNNING.compareAndSet(false, true)) return;
+ scheduler = Executors.newSingleThreadScheduledExecutor(r -> {
+ Thread t = new Thread(r, "PlayerSync-heartbeat");
+ t.setDaemon(true);
+ t.setPriority(Thread.MIN_PRIORITY);
+ return t;
+ });
+ scheduler.scheduleAtFixedRate(HeartbeatService::tick, PERIOD_MS, PERIOD_MS, TimeUnit.MILLISECONDS);
+ PlayerSync.LOGGER.info("[heartbeat] started (period={}ms, server_id={})", PERIOD_MS, JdbcConfig.SERVER_ID.get());
+ }
+
+ public static void stop() {
+ if (!RUNNING.compareAndSet(true, false)) return;
+ if (scheduler != null) {
+ scheduler.shutdownNow();
+ scheduler = null;
+ }
+ PlayerSync.LOGGER.info("[heartbeat] stopped");
+ }
+
+ private static void tick() {
+ try {
+ int serverId = JdbcConfig.SERVER_ID.get();
+ JDBCsetUp.executePreparedUpdate(
+ "UPDATE " + Tables.serverInfo() + " SET last_update=?, enable=1 WHERE id=?",
+ System.currentTimeMillis(), serverId);
+ } catch (Throwable t) {
+ // Do not kill the scheduler on a transient DB error — log and retry next tick.
+ PlayerSync.LOGGER.warn("[heartbeat] tick failed: {}", t.getMessage());
+ }
+ }
+}