feat：文件扫描功能、hash码计算与重复文件扫描的类实现

2025-07-14 15:29:48 +08:00 · 2025-07-14 15:29:48 +08:00 · 5e2fbfe1f9
commit 5e2fbfe1f9
parent e764df736a
15 changed files with 615 additions and 34 deletions
--- a/build.gradle
+++ b/build.gradle
@ -1,3 +1,9 @@
+buildscript {
+    repositories {
+        google()
+        mavenCentral()
+    }
+}
 plugins {
    id 'java'
    id 'io.franzbecker.gradle-lombok' version '3.0.0'
@ -56,12 +62,17 @@ dependencies {
    testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:${junitVersion}")
 }

+
 test {
    useJUnitPlatform()
    configurations.configureEach {
        exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl'
    }
 }
+// 可选：添加 testJar 任务
+tasks.register('testJar', Jar) {
+    from sourceSets.test.output
+}

 tasks.register('createLogDir') {
    doLast {
--- a/src/main/java/module-info.java
+++ b/src/main/java/module-info.java
@ -8,11 +8,15 @@ module top.r3944realms.docchecktoolrefactored {
    opens top.r3944realms.docchecktoolrefactored to javafx.fxml;
    opens top.r3944realms.docchecktoolrefactored.ui to javafx.fxml;
    opens top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml;
+    opens top.r3944realms.docchecktoolrefactored.deprecated to javafx.fxml;

    exports top.r3944realms.docchecktoolrefactored to javafx.graphics;
    exports top.r3944realms.docchecktoolrefactored.ui to javafx.fxml;
    exports top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml;
    exports top.r3944realms.docchecktoolrefactored.deprecated to javafx.graphics;
-    opens top.r3944realms.docchecktoolrefactored.deprecated to javafx.fxml;

+    exports top.r3944realms.docchecktoolrefactored.core ;
+    exports top.r3944realms.docchecktoolrefactored.io.scanner;
+    exports top.r3944realms.docchecktoolrefactored.io.reader;
+    exports top.r3944realms.docchecktoolrefactored.model;
 }
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/Main.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/Main.java
@ -4,6 +4,7 @@ import javafx.application.Application;
 import javafx.stage.Stage;
 import lombok.extern.slf4j.Slf4j;
 import top.r3944realms.docchecktoolrefactored.ui.SceneManager;
+import top.r3944realms.docchecktoolrefactored.util.StringUtil;

 /**
 * The type Main.
@ -28,7 +29,7 @@ public class Main extends Application {
     * @param args the input arguments
     */
    public static void main(String[] args) {
-        log.info("Hello World!");
+        log.info(StringUtil.NO_BUG);
        launch(args);
    }
 }
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/core/DuplicateFinder.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/core/DuplicateFinder.java
@ -0,0 +1,104 @@
+package top.r3944realms.docchecktoolrefactored.core;
+
+import lombok.extern.slf4j.Slf4j;
+import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
+import top.r3944realms.docchecktoolrefactored.model.DuplicateGroup;
+import top.r3944realms.docchecktoolrefactored.model.FileMetadata;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
+
+
+/**
+ * 重复文件查找核心类
+ */
+@Slf4j
+public class DuplicateFinder {
+    private final FileScanner fileScanner;
+    private final FileHashCalculator hashCalculator;
+    private final boolean enableProgress;
+    public DuplicateFinder(FileScanner fileScanner, FileHashCalculator hashCalculator, boolean enableProgress) {
+        this.fileScanner = Objects.requireNonNull(fileScanner);
+        this.hashCalculator = Objects.requireNonNull(hashCalculator);
+        this.enableProgress = enableProgress;
+    }
+    public DuplicateFinder(FileScanner fileScanner, FileHashCalculator hashCalculator) {
+        this(fileScanner, hashCalculator, false);
+    }
+
+    /**
+     * 查找重复文件
+     * @param rootDir 要扫描的根目录
+     * @return 按哈希值分组的重复文件列表
+     */
+    public List<DuplicateGroup> findDuplicates(Path rootDir) throws IOException {
+        // 第一阶段：按文件大小分组
+        Map<Long, List<FileMetadata>> sizeGroups = groupFilesBySize(rootDir);
+
+        // 第二阶段：对可能重复的文件计算哈希
+        Map<String, List<FileMetadata>> hashGroups = new ConcurrentHashMap<>();
+
+        sizeGroups.values().parallelStream()
+                .filter(group -> group.size() > 1) // 只处理可能重复的文件
+                .forEach(group -> group.parallelStream().forEach(file -> {
+                    try {
+                        String hash = hashCalculator.calculateHash(file.getPath());
+                        file.setHash(hash);
+                        hashGroups.computeIfAbsent(hash, k -> new ArrayList<>()).add(file);
+                    } catch (IOException e) {
+                        // 记录错误但继续处理其他文件
+                        log.error("Failed to calculate file's hash: {}, {}", file.getPath(), e.getMessage());
+                    }
+                }));
+
+        // 第三阶段：构建结果
+        return hashGroups.values().stream()
+                .filter(group -> group.size() > 1)
+                .map(group -> new DuplicateGroup(
+                        group.get(0).getHash(),
+                        group.get(0).getSize(),
+                        group
+                ))
+                .sorted(Comparator.comparingLong(DuplicateGroup::size).reversed())
+                .collect(Collectors.toList());
+    }
+
+    /**
+     * 按文件大小分组
+     */
+    private Map<Long, List<FileMetadata>> groupFilesBySize(Path rootDir) throws IOException {
+        Map<Long, List<FileMetadata>> sizeGroups = new ConcurrentHashMap<>();
+        FileScanner.ProgressAwareListener listener = new FileScanner.ProgressAwareListener() {
+            @Override
+            public void onProgressUpdate(int current, int total) {
+                log.info("Scanning progress: {}/{} ", current, total);
+            }
+
+            @Override
+            public void onFileFound(Path file) {
+                try {
+                    FileMetadata meta = new FileMetadata();
+                    meta.setPath(file);
+                    meta.setSize(Files.size(file));
+                    sizeGroups.computeIfAbsent(meta.getSize(), k -> new ArrayList<>()).add(meta);
+                } catch (IOException e) {
+                    log.error("Failed to get file's size: {}", file);
+                }
+            }
+
+            @Override public void onScanComplete() {}
+            @Override public void onError(Path file, Exception e) {
+                log.error("Error on scanning file: {}, {}", file, e.getMessage());
+            }
+        };
+        if(enableProgress)
+            fileScanner.scanWithProgress(rootDir, listener);
+        else
+            fileScanner.scan(rootDir, listener);
+        return sizeGroups;
+    }
+}
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/core/FileHashCalculator.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/core/FileHashCalculator.java
@ -0,0 +1,23 @@
+package top.r3944realms.docchecktoolrefactored.core;
+
+import java.io.IOException;
+import java.nio.file.Path;
+
+/**
+ * 文件哈希计算策略接口
+ */
+public interface FileHashCalculator {
+    /**
+     * 计算文件哈希值
+     * @param file 要计算的文件路径
+     * @return 文件的哈希值字符串
+     */
+    String calculateHash(Path file) throws IOException;
+
+    /**
+     * 默认实现使用MD5
+     */
+    static FileHashCalculator defaultInstance() {
+        return new MD5HashCalculator();
+    }
+}
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/core/MD5HashCalculator.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/core/MD5HashCalculator.java
@ -0,0 +1,39 @@
+package top.r3944realms.docchecktoolrefactored.core;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+/**
+ * MD5哈希计算实现
+ */
+public class MD5HashCalculator implements FileHashCalculator {
+    private static final int BUFFER_SIZE = 8192;
+
+    @Override
+    public String calculateHash(Path file) throws IOException {
+        try {
+            MessageDigest md = MessageDigest.getInstance("MD5");
+            try (var is = Files.newInputStream(file)) {
+                byte[] buffer = new byte[BUFFER_SIZE];
+                int bytesRead;
+                while ((bytesRead = is.read(buffer)) != -1) {
+                    md.update(buffer, 0, bytesRead);
+                }
+            }
+            return bytesToHex(md.digest());
+        } catch (NoSuchAlgorithmException e) {
+            throw new RuntimeException("MD5算法不可用", e);
+        }
+    }
+
+    private static String bytesToHex(byte[] bytes) {
+        StringBuilder sb = new StringBuilder();
+        for (byte b : bytes) {
+            sb.append(String.format("%02x", b));
+        }
+        return sb.toString();
+    }
+}
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/FileScanner.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/FileScanner.java
@ -12,7 +12,15 @@ public interface FileScanner {
     * @param rootPath 根路径
     * @param listener 文件发现监听器
     */
-    void scan(Path rootPath, FileScanListener listener);
+    default void scan(Path rootPath, FileScanListener listener) {
+        throw new UnsupportedOperationException("Please implement FileScanner, FileScannerListener.");
+    }
+    /**
+     * 扫描指定路径下的文件（带进度反馈）
+     */
+    default void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
+        throw new UnsupportedOperationException("Please implement FileScanner, ProgressAwareListener.");
+    }

    /**
     * 文件扫描监听器
@ -38,4 +46,14 @@ public interface FileScanner {
         */
        void onError(Path file, Exception e);
    }
+
+
+    interface ProgressAwareListener extends FileScanListener {
+        /**
+         * 进度更新回调
+         * @param current 当前已处理文件数
+         * @param total 预估总文件数（可能动态增长）
+         */
+        void onProgressUpdate(int current, int total);
+    }
 }
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/ParallelFileScanner.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/ParallelFileScanner.java
@ -1,18 +1,35 @@
 package top.r3944realms.docchecktoolrefactored.io.scanner;

+import lombok.extern.slf4j.Slf4j;
+
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.List;
 import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;

 /**
 * The type Parallel file scanner.
+ * <p>
+ * 这个没法正常使用，目前遇到的问题
+ * <p>
+ * * 目录遍历时遇到权限问题（静默失败）
+ * <p>
+ * * 存在符号链接循环
+ * <p>
+ * * 文件系统驱动程序卡死
+ * <p>
+ * * JVM与NTFS文件系统兼容性问题
 */
-public class ParallelFileScanner implements FileScanner,AutoCloseable {
+@Slf4j
+@Deprecated
+public class ParallelFileScanner implements FileScanner ,AutoCloseable {
    private final ForkJoinPool forkJoinPool;
-
+    private volatile boolean cancelled = false;
    /**
     * 使用默认并行度（CPU核心数）
     */
@ -31,38 +48,90 @@ public class ParallelFileScanner implements FileScanner,AutoCloseable {

    @Override
    public void scan(Path rootPath, FileScanListener listener) {
-        forkJoinPool.submit(() -> {
-                    try (
-                            Stream<Path> pathStream = Files.walk(rootPath)
-                                    .parallel()  // 使用ForkJoinPool的并行流
-                                    .filter(Files::isRegularFile)
-                    ){
-                            pathStream.forEach(file -> {
-                                try {
-                                    listener.onFileFound(file);
-                                } catch (Exception e) {
-                                    listener.onError(file, e);
-                                }
-                            });
-                    } catch (IOException e) {
-                        throw new RuntimeException(e);
-                    }
-                }
-        ).join();
+        scanInternal(rootPath, listener, null);
+    }
+    @Override
+    public void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
+        // 先快速统计总文件数
+        long totalFiles = countFiles(rootPath);
+        scanInternal(rootPath, listener, totalFiles);
+    }

-        listener.onScanComplete();
+    private long countFiles(Path rootPath) {
+        try(Stream<Path> pathStream = Files.walk(rootPath)
+                    .parallel()
+                    .filter(Files::isRegularFile)) {
+            return pathStream.count();
+        } catch (IOException e) {
+            return -1; // 表示无法确定总数
+        }
+    }
+    private void scanInternal(Path rootPath, FileScanListener listener, Long totalFiles) {
+        log.debug("ThreadPool Status: {}", forkJoinPool.isShutdown() ? "Closed" : "Running");
+        forkJoinPool.submit(() -> { // 方法没问题，可能就是在线程这里被卡死了
+            try {
+                AtomicInteger processed = new AtomicInteger(0);
+                log.debug("Scanning files in {}", rootPath);
+                // 收集所有文件到List（避免Stream被重复使用）
+                @SuppressWarnings("resource") List<Path> files = Files.walk(rootPath)
+                        .peek(p -> log.trace("visiting: {}", p))
+                        .parallel()
+                        .filter(p -> {
+                            boolean isRegular = Files.isRegularFile(p);
+                            if (!isRegular) {
+                                log.debug("Skip non-regular : {} ", p);
+                            }
+                            return isRegular;
+                        })
+                        .peek(p -> log.trace("Found file: {}", p))
+                        .toList();  // 立即消费Stream
+                if (files.isEmpty()) {
+                    log.warn("No files found in directory: {}", rootPath);
+                } else log.debug("Found {} files in {}", files.size(), rootPath);
+                files.forEach(file -> {
+                        if (cancelled) {
+                            log.debug("Cancelled scanning file {}", file);
+                            return;
+                        }
+
+                        try {
+                            log.debug("Handle file {}", file);
+                            listener.onFileFound(file);
+
+                            // 进度更新
+                            if (listener instanceof ProgressAwareListener progressListener) {
+                                int current = processed.incrementAndGet();
+                                progressListener.onProgressUpdate(
+                                        current,
+                                        totalFiles != null ? totalFiles.intValue() : -1
+                                );
+                            }
+                        } catch (Exception e) {
+                            log.debug("Error Handle file {}", file, e);
+                            listener.onError(file, e);
+                        }
+                    });
+                if (!cancelled) {
+                    log.debug("Finished scanning files in {}", rootPath);
+                    listener.onScanComplete();
+                }
+            } catch (IOException e) {
+                listener.onError(rootPath, e);
+            } catch (Exception e) {
+                log.error("Unexpected error in scan thread", e);
+                listener.onError(rootPath, e);
+            }
+        });
+        log.debug("Task submitted to thread pool");
+    }
+    
+    public void cancel() {
+        cancelled = true;
+        forkJoinPool.shutdownNow();
    }

    @Override
    public void close() {
-        forkJoinPool.shutdown();
-        try {
-            if (!forkJoinPool.awaitTermination(1, TimeUnit.SECONDS)) {
-                forkJoinPool.shutdownNow();
-            }
-        } catch (InterruptedException e) {
-            forkJoinPool.shutdownNow();
-            Thread.currentThread().interrupt();
-        }
+        cancel();
    }
 }
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/RobustParallelScanner.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/RobustParallelScanner.java
@ -0,0 +1,141 @@
+package top.r3944realms.docchecktoolrefactored.io.scanner;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.file.AccessDeniedException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+@Slf4j
+public class RobustParallelScanner implements FileScanner, AutoCloseable {
+    private final ForkJoinPool forkJoinPool;
+    private volatile boolean cancelled = false;
+    private final int maxDepth;
+    public RobustParallelScanner(int maxDepth) {
+        this(Runtime.getRuntime().availableProcessors(), maxDepth);
+    }
+    public RobustParallelScanner(int parallelism, int maxDepth) {
+        this.forkJoinPool = new ForkJoinPool(parallelism);
+        this.maxDepth = maxDepth; // 防止无限递归
+    }
+
+    @Override
+    public void scan(Path rootPath, FileScanListener listener) {
+        scanInternal(rootPath, listener, null);
+    }
+    @Override
+    public void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
+        // 预扫描阶段：计算总文件数
+        AtomicLong totalFiles = new AtomicLong(0);
+        countFiles(rootPath, totalFiles);
+        scanInternal(rootPath, listener, totalFiles);
+    }
+    private void countFiles(Path dir, AtomicLong counter) {
+        if (cancelled) return;
+
+        try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
+            for (Path path : stream) {
+                if (cancelled) return;
+
+                if (Files.isDirectory(path)) {
+                    countFiles(path, counter);
+                } else if (Files.isRegularFile(path)) {
+                    counter.incrementAndGet();
+                }
+            }
+        } catch (IOException e) {
+            log.warn("Failed to pre-scan: {}", dir, e);
+        }
+    }
+    private void scanInternal(Path rootPath, FileScanListener listener, AtomicLong totalFiles) {
+        try {
+            validateDirectory(rootPath);
+
+            forkJoinPool.submit(() -> {
+                try {
+                    AtomicInteger processedFiles = new AtomicInteger(0);
+                    scanDirectory(rootPath, listener, processedFiles, totalFiles, 0);
+
+                    if (!cancelled) {
+                        listener.onScanComplete();
+                    }
+                } catch (Exception e) {
+                    listener.onError(rootPath, e);
+                }
+            }).get(30, TimeUnit.SECONDS);
+        } catch (TimeoutException e) {
+            log.error("Scan timeout: {}", rootPath, e);
+            forkJoinPool.shutdownNow();
+            listener.onError(rootPath, new TimeoutException("扫描超时30秒"));
+        } catch (Exception e) {
+            listener.onError(rootPath, e);
+        }
+    }
+
+    private void scanDirectory(Path dir, FileScanListener listener,
+                               AtomicInteger processedFiles, AtomicLong totalFiles, int currentDepth) {
+        if (cancelled || currentDepth > maxDepth) return;
+
+        try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
+            for (Path path : stream) {
+                if (cancelled) break;
+
+                if (Files.isDirectory(path)) {
+                    scanDirectory(path, listener, processedFiles, totalFiles, currentDepth + 1);
+                } else if (Files.isRegularFile(path)) {
+                    processFile(path, listener, processedFiles, totalFiles);
+                }
+            }
+        } catch (IOException e) {
+            listener.onError(dir, e);
+        }
+    }
+    private void processFile(Path file, FileScanListener listener,
+                             AtomicInteger processedFiles, AtomicLong totalFiles) {
+        if (cancelled) return;
+
+        try {
+            listener.onFileFound(file);
+
+            // 进度更新处理
+            if (listener instanceof ProgressAwareListener progressListener && totalFiles != null) {
+                int processed = processedFiles.incrementAndGet();
+                long total = totalFiles.get();
+                progressListener.onProgressUpdate(processed, (int)total);
+            }
+        } catch (Exception e) {
+            listener.onError(file, e);
+        }
+    }
+
+    private void validateDirectory(Path path) throws IOException {
+        if (!Files.exists(path)) {
+            throw new FileNotFoundException(path.toString());
+        }
+        if (!Files.isReadable(path)) {
+            throw new AccessDeniedException(path.toString());
+        }
+        // 检查是否是挂载点
+        if (Files.getFileStore(path).type().equals("NTFS") &&
+                path.toString().contains("$")) {
+            throw new IOException("系统目录禁止访问: " + path);
+        }
+    }
+    public void cancel() {
+        cancelled = true;
+        forkJoinPool.shutdownNow();
+    }
+
+    @Override
+    public void close() {
+        cancel();
+    }
+}
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/ui/SceneManager.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/ui/SceneManager.java
@ -123,7 +123,7 @@ public class SceneManager {
    }

    /**
-     * Try get scene handler.
+     * Try to get scene handler.
     *
     * @param node    the node
     * @param handler the handler
--- a/src/main/java/top/r3944realms/docchecktoolrefactored/util/StringUtil.java
+++ b/src/main/java/top/r3944realms/docchecktoolrefactored/util/StringUtil.java
@ -0,0 +1,27 @@
+package top.r3944realms.docchecktoolrefactored.util;
+
+public class StringUtil {
+    public static String NO_BUG = """
+          
+                                          _ooOoo_
+                                         o8888888o
+                                         88" . "88
+                                         (| -_- |)
+                                         O\\  =  /O
+                                      ____/`---'\\____
+                                    .'  \\\\|     |//  `.
+                                   /  \\\\|||  :  |||//  \\
+                                  /  _||||| -:- |||||-  \\
+                                  |   | \\\\\\  -  /// |   |
+                                  | \\_|  ''\\---/''  |   |
+                                  \\  .-\\__  `-`  ___/-. /
+                                ___`. .'  /--.--\\  `. . __
+                             ."" '<  `.___\\_<|>_/___.'  >'"".
+                            | | :  `- \\`.;`\\ _ /`;.`/ - ` : | |
+                            \\  \\ `-.   \\_ __\\ /__ _/   .-` /  /
+                       ======`-.____`-.___\\_____/___.-`____.-'======
+                                          `=---='
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+                                  佛祖保佑        永无BUG
+            """;
+}
--- a/src/test/java/module-info.java
+++ b/src/test/java/module-info.java
@ -0,0 +1,9 @@
+module top.r3944realms.docchecktoolrefactored.test {
+    requires static lombok;
+    requires org.slf4j;
+    requires top.r3944realms.docchecktoolrefactored;
+    requires org.junit.jupiter.api;
+
+    exports top.r3944realms.docchecktoolrefactored.test;
+    opens top.r3944realms.docchecktoolrefactored.test;
+}
--- a/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateFinderPerformanceTest.java
+++ b/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateFinderPerformanceTest.java
@ -0,0 +1,67 @@
+package top.r3944realms.docchecktoolrefactored.test;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.RepeatedTest;
+import top.r3944realms.docchecktoolrefactored.core.DuplicateFinder;
+import top.r3944realms.docchecktoolrefactored.core.FileHashCalculator;
+import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
+import top.r3944realms.docchecktoolrefactored.io.scanner.RobustParallelScanner;
+
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class DuplicateFinderPerformanceTest {
+
+    private static final String TEST_PATH = "D:/测试数据/JPG";
+    private FileScanner scanner;
+    private FileHashCalculator hashCalculator;
+
+    @BeforeEach
+    void setUp() {
+        scanner = new RobustParallelScanner(20);
+        hashCalculator = FileHashCalculator.defaultInstance();
+    }
+
+    @RepeatedTest(5)
+    void compareFinderPerformance() throws IOException {
+        // Test finder WITHOUT pre-counting
+        long startWithoutPrecount = System.nanoTime();
+        DuplicateFinder finderWithoutPrecount = new DuplicateFinder(scanner, hashCalculator, false);
+        finderWithoutPrecount.findDuplicates(Paths.get(TEST_PATH));
+        long durationWithoutPrecount = System.nanoTime() - startWithoutPrecount;
+
+        // Test finder WITH pre-counting
+        long startWithPrecount = System.nanoTime();
+        DuplicateFinder finderWithPrecount = new DuplicateFinder(scanner, hashCalculator, true);
+        finderWithPrecount.findDuplicates(Paths.get(TEST_PATH));
+        long durationWithPrecount = System.nanoTime() - startWithPrecount;
+
+        // Convert to milliseconds
+        long msWithout = TimeUnit.NANOSECONDS.toMillis(durationWithoutPrecount);
+        long msWith = TimeUnit.NANOSECONDS.toMillis(durationWithPrecount);
+
+        System.out.println("Without pre-counting: " + msWithout + " ms");
+        System.out.println("With pre-counting: " + msWith + " ms");
+
+//        // Assert that pre-counting provides benefit
+//        assertTrue(msWith < msWithout * 1.2,
+//                "Pre-counting version should not be more than 20% slower");
+    }
+
+    @Test
+    void verifySameResults() throws IOException {
+        DuplicateFinder finder1 = new DuplicateFinder(scanner, hashCalculator, false);
+        var result1 = finder1.findDuplicates(Paths.get(TEST_PATH));
+
+        DuplicateFinder finder2 = new DuplicateFinder(scanner, hashCalculator, true);
+        var result2 = finder2.findDuplicates(Paths.get(TEST_PATH));
+
+//        // Verify both methods find the same duplicates
+//        assertTrue(result1.containsAll(result2) && result2.containsAll(result1),
+//                "Both methods should find the same duplicate files");
+    }
+}
--- a/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateTest.java
+++ b/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateTest.java
@ -0,0 +1,36 @@
+package top.r3944realms.docchecktoolrefactored.test;
+
+import lombok.extern.slf4j.Slf4j;
+import top.r3944realms.docchecktoolrefactored.core.DuplicateFinder;
+import top.r3944realms.docchecktoolrefactored.core.FileHashCalculator;
+import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
+import top.r3944realms.docchecktoolrefactored.io.scanner.RobustParallelScanner;
+import top.r3944realms.docchecktoolrefactored.model.DuplicateGroup;
+
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.util.List;
+
+@Slf4j
+public class DuplicateTest {
+    public static void main(String[] args) throws IOException {
+        // 创建组件
+        FileScanner scanner1 = new RobustParallelScanner(20);
+        FileHashCalculator hashCalculator1 = FileHashCalculator.defaultInstance();
+        FileScanner scanner2 = new RobustParallelScanner(20);
+        FileHashCalculator hashCalculator2 = FileHashCalculator.defaultInstance();
+
+        // 执行查重
+        DuplicateFinder finder = new DuplicateFinder(scanner1, hashCalculator1);
+        DuplicateFinder finder2 = new DuplicateFinder(scanner2, hashCalculator2, true);
+        List<DuplicateGroup> duplicates = finder.findDuplicates(Paths.get("H:\\nw0\\newworld(1)(1)"));
+
+        // 处理结果
+        duplicates.forEach(group -> {
+            log.info("发现重复文件组({} bytes):", group.size());
+            group.fileMetas().forEach(file ->
+                    log.info("  {}", file.getPath())
+            );
+        });
+    }
+}
--- a/src/test/java/top/r3944realms/docchecktoolrefactored/test/ParallelFileScannerTest.java
+++ b/src/test/java/top/r3944realms/docchecktoolrefactored/test/ParallelFileScannerTest.java
@ -0,0 +1,32 @@
+package top.r3944realms.docchecktoolrefactored.test;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+@Slf4j
+public class ParallelFileScannerTest {
+    public static void main(String[] args) throws IOException {
+       test1(Path.of("D:/测试数据/JPG"));
+    }
+    private static void test1(Path rootPath) throws IOException {
+        @SuppressWarnings("resource") List<Path> files = Files.walk(rootPath)
+                .peek(p -> log.trace("visiting: {}", p))
+                .parallel()
+                .filter(p -> {
+                    boolean isRegular = Files.isRegularFile(p);
+                    if (!isRegular) {
+                        log.debug("Skip non-regular : {} ", p);
+                    }
+                    return isRegular;
+                })
+                .peek(p -> log.trace("Found file: {}", p))
+                .toList();  // 立即消费Stream
+        if (files.isEmpty()) {
+            log.warn("No files found in directory: {}", rootPath);
+        } else log.debug("Found {} files in {}", files.size(), rootPath);
+    }
+}