From 5e2fbfe1f91821f47c221458b0bfb8105c1f3c62 Mon Sep 17 00:00:00 2001 From: 3944Realms Date: Mon, 14 Jul 2025 15:29:48 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E6=96=87=E4=BB=B6=E6=89=AB?= =?UTF-8?q?=E6=8F=8F=E5=8A=9F=E8=83=BD=E3=80=81hash=E7=A0=81=E8=AE=A1?= =?UTF-8?q?=E7=AE=97=E4=B8=8E=E9=87=8D=E5=A4=8D=E6=96=87=E4=BB=B6=E6=89=AB?= =?UTF-8?q?=E6=8F=8F=E7=9A=84=E7=B1=BB=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 11 ++ src/main/java/module-info.java | 6 +- .../docchecktoolrefactored/Main.java | 3 +- .../core/DuplicateFinder.java | 104 +++++++++++++ .../core/FileHashCalculator.java | 23 +++ .../core/MD5HashCalculator.java | 39 +++++ .../io/scanner/FileScanner.java | 20 ++- .../io/scanner/ParallelFileScanner.java | 129 ++++++++++++---- .../io/scanner/RobustParallelScanner.java | 141 ++++++++++++++++++ .../ui/SceneManager.java | 2 +- .../util/StringUtil.java | 27 ++++ src/test/java/module-info.java | 9 ++ .../test/DuplicateFinderPerformanceTest.java | 67 +++++++++ .../test/DuplicateTest.java | 36 +++++ .../test/ParallelFileScannerTest.java | 32 ++++ 15 files changed, 615 insertions(+), 34 deletions(-) create mode 100644 src/main/java/top/r3944realms/docchecktoolrefactored/core/DuplicateFinder.java create mode 100644 src/main/java/top/r3944realms/docchecktoolrefactored/core/FileHashCalculator.java create mode 100644 src/main/java/top/r3944realms/docchecktoolrefactored/core/MD5HashCalculator.java create mode 100644 src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/RobustParallelScanner.java create mode 100644 src/main/java/top/r3944realms/docchecktoolrefactored/util/StringUtil.java create mode 100644 src/test/java/module-info.java create mode 100644 src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateFinderPerformanceTest.java create mode 100644 src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateTest.java create mode 100644 src/test/java/top/r3944realms/docchecktoolrefactored/test/ParallelFileScannerTest.java diff --git a/build.gradle b/build.gradle index e711904..78158f5 100644 --- a/build.gradle +++ b/build.gradle @@ -1,3 +1,9 @@ +buildscript { + repositories { + google() + mavenCentral() + } +} plugins { id 'java' id 'io.franzbecker.gradle-lombok' version '3.0.0' @@ -56,12 +62,17 @@ dependencies { testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:${junitVersion}") } + test { useJUnitPlatform() configurations.configureEach { exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl' } } +// 可选:添加 testJar 任务 +tasks.register('testJar', Jar) { + from sourceSets.test.output +} tasks.register('createLogDir') { doLast { diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java index 3fe0648..2db4a61 100644 --- a/src/main/java/module-info.java +++ b/src/main/java/module-info.java @@ -8,11 +8,15 @@ module top.r3944realms.docchecktoolrefactored { opens top.r3944realms.docchecktoolrefactored to javafx.fxml; opens top.r3944realms.docchecktoolrefactored.ui to javafx.fxml; opens top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml; + opens top.r3944realms.docchecktoolrefactored.deprecated to javafx.fxml; exports top.r3944realms.docchecktoolrefactored to javafx.graphics; exports top.r3944realms.docchecktoolrefactored.ui to javafx.fxml; exports top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml; exports top.r3944realms.docchecktoolrefactored.deprecated to javafx.graphics; - opens top.r3944realms.docchecktoolrefactored.deprecated to javafx.fxml; + exports top.r3944realms.docchecktoolrefactored.core ; + exports top.r3944realms.docchecktoolrefactored.io.scanner; + exports top.r3944realms.docchecktoolrefactored.io.reader; + exports top.r3944realms.docchecktoolrefactored.model; } \ No newline at end of file diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/Main.java b/src/main/java/top/r3944realms/docchecktoolrefactored/Main.java index af07374..6cd07e9 100644 --- a/src/main/java/top/r3944realms/docchecktoolrefactored/Main.java +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/Main.java @@ -4,6 +4,7 @@ import javafx.application.Application; import javafx.stage.Stage; import lombok.extern.slf4j.Slf4j; import top.r3944realms.docchecktoolrefactored.ui.SceneManager; +import top.r3944realms.docchecktoolrefactored.util.StringUtil; /** * The type Main. @@ -28,7 +29,7 @@ public class Main extends Application { * @param args the input arguments */ public static void main(String[] args) { - log.info("Hello World!"); + log.info(StringUtil.NO_BUG); launch(args); } } \ No newline at end of file diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/core/DuplicateFinder.java b/src/main/java/top/r3944realms/docchecktoolrefactored/core/DuplicateFinder.java new file mode 100644 index 0000000..13bbc87 --- /dev/null +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/core/DuplicateFinder.java @@ -0,0 +1,104 @@ +package top.r3944realms.docchecktoolrefactored.core; + +import lombok.extern.slf4j.Slf4j; +import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner; +import top.r3944realms.docchecktoolrefactored.model.DuplicateGroup; +import top.r3944realms.docchecktoolrefactored.model.FileMetadata; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + + +/** + * 重复文件查找核心类 + */ +@Slf4j +public class DuplicateFinder { + private final FileScanner fileScanner; + private final FileHashCalculator hashCalculator; + private final boolean enableProgress; + public DuplicateFinder(FileScanner fileScanner, FileHashCalculator hashCalculator, boolean enableProgress) { + this.fileScanner = Objects.requireNonNull(fileScanner); + this.hashCalculator = Objects.requireNonNull(hashCalculator); + this.enableProgress = enableProgress; + } + public DuplicateFinder(FileScanner fileScanner, FileHashCalculator hashCalculator) { + this(fileScanner, hashCalculator, false); + } + + /** + * 查找重复文件 + * @param rootDir 要扫描的根目录 + * @return 按哈希值分组的重复文件列表 + */ + public List findDuplicates(Path rootDir) throws IOException { + // 第一阶段:按文件大小分组 + Map> sizeGroups = groupFilesBySize(rootDir); + + // 第二阶段:对可能重复的文件计算哈希 + Map> hashGroups = new ConcurrentHashMap<>(); + + sizeGroups.values().parallelStream() + .filter(group -> group.size() > 1) // 只处理可能重复的文件 + .forEach(group -> group.parallelStream().forEach(file -> { + try { + String hash = hashCalculator.calculateHash(file.getPath()); + file.setHash(hash); + hashGroups.computeIfAbsent(hash, k -> new ArrayList<>()).add(file); + } catch (IOException e) { + // 记录错误但继续处理其他文件 + log.error("Failed to calculate file's hash: {}, {}", file.getPath(), e.getMessage()); + } + })); + + // 第三阶段:构建结果 + return hashGroups.values().stream() + .filter(group -> group.size() > 1) + .map(group -> new DuplicateGroup( + group.get(0).getHash(), + group.get(0).getSize(), + group + )) + .sorted(Comparator.comparingLong(DuplicateGroup::size).reversed()) + .collect(Collectors.toList()); + } + + /** + * 按文件大小分组 + */ + private Map> groupFilesBySize(Path rootDir) throws IOException { + Map> sizeGroups = new ConcurrentHashMap<>(); + FileScanner.ProgressAwareListener listener = new FileScanner.ProgressAwareListener() { + @Override + public void onProgressUpdate(int current, int total) { + log.info("Scanning progress: {}/{} ", current, total); + } + + @Override + public void onFileFound(Path file) { + try { + FileMetadata meta = new FileMetadata(); + meta.setPath(file); + meta.setSize(Files.size(file)); + sizeGroups.computeIfAbsent(meta.getSize(), k -> new ArrayList<>()).add(meta); + } catch (IOException e) { + log.error("Failed to get file's size: {}", file); + } + } + + @Override public void onScanComplete() {} + @Override public void onError(Path file, Exception e) { + log.error("Error on scanning file: {}, {}", file, e.getMessage()); + } + }; + if(enableProgress) + fileScanner.scanWithProgress(rootDir, listener); + else + fileScanner.scan(rootDir, listener); + return sizeGroups; + } +} \ No newline at end of file diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/core/FileHashCalculator.java b/src/main/java/top/r3944realms/docchecktoolrefactored/core/FileHashCalculator.java new file mode 100644 index 0000000..b986ff2 --- /dev/null +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/core/FileHashCalculator.java @@ -0,0 +1,23 @@ +package top.r3944realms.docchecktoolrefactored.core; + +import java.io.IOException; +import java.nio.file.Path; + +/** + * 文件哈希计算策略接口 + */ +public interface FileHashCalculator { + /** + * 计算文件哈希值 + * @param file 要计算的文件路径 + * @return 文件的哈希值字符串 + */ + String calculateHash(Path file) throws IOException; + + /** + * 默认实现使用MD5 + */ + static FileHashCalculator defaultInstance() { + return new MD5HashCalculator(); + } +} diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/core/MD5HashCalculator.java b/src/main/java/top/r3944realms/docchecktoolrefactored/core/MD5HashCalculator.java new file mode 100644 index 0000000..70fb98f --- /dev/null +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/core/MD5HashCalculator.java @@ -0,0 +1,39 @@ +package top.r3944realms.docchecktoolrefactored.core; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +/** + * MD5哈希计算实现 + */ +public class MD5HashCalculator implements FileHashCalculator { + private static final int BUFFER_SIZE = 8192; + + @Override + public String calculateHash(Path file) throws IOException { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + try (var is = Files.newInputStream(file)) { + byte[] buffer = new byte[BUFFER_SIZE]; + int bytesRead; + while ((bytesRead = is.read(buffer)) != -1) { + md.update(buffer, 0, bytesRead); + } + } + return bytesToHex(md.digest()); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException("MD5算法不可用", e); + } + } + + private static String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } +} \ No newline at end of file diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/FileScanner.java b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/FileScanner.java index 39f19df..4c08da6 100644 --- a/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/FileScanner.java +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/FileScanner.java @@ -12,7 +12,15 @@ public interface FileScanner { * @param rootPath 根路径 * @param listener 文件发现监听器 */ - void scan(Path rootPath, FileScanListener listener); + default void scan(Path rootPath, FileScanListener listener) { + throw new UnsupportedOperationException("Please implement FileScanner, FileScannerListener."); + } + /** + * 扫描指定路径下的文件(带进度反馈) + */ + default void scanWithProgress(Path rootPath, ProgressAwareListener listener) { + throw new UnsupportedOperationException("Please implement FileScanner, ProgressAwareListener."); + } /** * 文件扫描监听器 @@ -38,4 +46,14 @@ public interface FileScanner { */ void onError(Path file, Exception e); } + + + interface ProgressAwareListener extends FileScanListener { + /** + * 进度更新回调 + * @param current 当前已处理文件数 + * @param total 预估总文件数(可能动态增长) + */ + void onProgressUpdate(int current, int total); + } } diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/ParallelFileScanner.java b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/ParallelFileScanner.java index b5f9d7b..57990b4 100644 --- a/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/ParallelFileScanner.java +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/ParallelFileScanner.java @@ -1,18 +1,35 @@ package top.r3944realms.docchecktoolrefactored.io.scanner; +import lombok.extern.slf4j.Slf4j; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.List; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import java.util.stream.Stream; /** * The type Parallel file scanner. + *

+ * 这个没法正常使用,目前遇到的问题 + *

+ * * 目录遍历时遇到权限问题(静默失败) + *

+ * * 存在符号链接循环 + *

+ * * 文件系统驱动程序卡死 + *

+ * * JVM与NTFS文件系统兼容性问题 */ -public class ParallelFileScanner implements FileScanner,AutoCloseable { +@Slf4j +@Deprecated +public class ParallelFileScanner implements FileScanner ,AutoCloseable { private final ForkJoinPool forkJoinPool; - + private volatile boolean cancelled = false; /** * 使用默认并行度(CPU核心数) */ @@ -31,38 +48,90 @@ public class ParallelFileScanner implements FileScanner,AutoCloseable { @Override public void scan(Path rootPath, FileScanListener listener) { - forkJoinPool.submit(() -> { - try ( - Stream pathStream = Files.walk(rootPath) - .parallel() // 使用ForkJoinPool的并行流 - .filter(Files::isRegularFile) - ){ - pathStream.forEach(file -> { - try { - listener.onFileFound(file); - } catch (Exception e) { - listener.onError(file, e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - ).join(); + scanInternal(rootPath, listener, null); + } + @Override + public void scanWithProgress(Path rootPath, ProgressAwareListener listener) { + // 先快速统计总文件数 + long totalFiles = countFiles(rootPath); + scanInternal(rootPath, listener, totalFiles); + } - listener.onScanComplete(); + private long countFiles(Path rootPath) { + try(Stream pathStream = Files.walk(rootPath) + .parallel() + .filter(Files::isRegularFile)) { + return pathStream.count(); + } catch (IOException e) { + return -1; // 表示无法确定总数 + } + } + private void scanInternal(Path rootPath, FileScanListener listener, Long totalFiles) { + log.debug("ThreadPool Status: {}", forkJoinPool.isShutdown() ? "Closed" : "Running"); + forkJoinPool.submit(() -> { // 方法没问题,可能就是在线程这里被卡死了 + try { + AtomicInteger processed = new AtomicInteger(0); + log.debug("Scanning files in {}", rootPath); + // 收集所有文件到List(避免Stream被重复使用) + @SuppressWarnings("resource") List files = Files.walk(rootPath) + .peek(p -> log.trace("visiting: {}", p)) + .parallel() + .filter(p -> { + boolean isRegular = Files.isRegularFile(p); + if (!isRegular) { + log.debug("Skip non-regular : {} ", p); + } + return isRegular; + }) + .peek(p -> log.trace("Found file: {}", p)) + .toList(); // 立即消费Stream + if (files.isEmpty()) { + log.warn("No files found in directory: {}", rootPath); + } else log.debug("Found {} files in {}", files.size(), rootPath); + files.forEach(file -> { + if (cancelled) { + log.debug("Cancelled scanning file {}", file); + return; + } + + try { + log.debug("Handle file {}", file); + listener.onFileFound(file); + + // 进度更新 + if (listener instanceof ProgressAwareListener progressListener) { + int current = processed.incrementAndGet(); + progressListener.onProgressUpdate( + current, + totalFiles != null ? totalFiles.intValue() : -1 + ); + } + } catch (Exception e) { + log.debug("Error Handle file {}", file, e); + listener.onError(file, e); + } + }); + if (!cancelled) { + log.debug("Finished scanning files in {}", rootPath); + listener.onScanComplete(); + } + } catch (IOException e) { + listener.onError(rootPath, e); + } catch (Exception e) { + log.error("Unexpected error in scan thread", e); + listener.onError(rootPath, e); + } + }); + log.debug("Task submitted to thread pool"); + } + + public void cancel() { + cancelled = true; + forkJoinPool.shutdownNow(); } @Override public void close() { - forkJoinPool.shutdown(); - try { - if (!forkJoinPool.awaitTermination(1, TimeUnit.SECONDS)) { - forkJoinPool.shutdownNow(); - } - } catch (InterruptedException e) { - forkJoinPool.shutdownNow(); - Thread.currentThread().interrupt(); - } + cancel(); } } \ No newline at end of file diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/RobustParallelScanner.java b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/RobustParallelScanner.java new file mode 100644 index 0000000..4e6882c --- /dev/null +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/io/scanner/RobustParallelScanner.java @@ -0,0 +1,141 @@ +package top.r3944realms.docchecktoolrefactored.io.scanner; + +import lombok.extern.slf4j.Slf4j; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.AccessDeniedException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +@Slf4j +public class RobustParallelScanner implements FileScanner, AutoCloseable { + private final ForkJoinPool forkJoinPool; + private volatile boolean cancelled = false; + private final int maxDepth; + public RobustParallelScanner(int maxDepth) { + this(Runtime.getRuntime().availableProcessors(), maxDepth); + } + public RobustParallelScanner(int parallelism, int maxDepth) { + this.forkJoinPool = new ForkJoinPool(parallelism); + this.maxDepth = maxDepth; // 防止无限递归 + } + + @Override + public void scan(Path rootPath, FileScanListener listener) { + scanInternal(rootPath, listener, null); + } + @Override + public void scanWithProgress(Path rootPath, ProgressAwareListener listener) { + // 预扫描阶段:计算总文件数 + AtomicLong totalFiles = new AtomicLong(0); + countFiles(rootPath, totalFiles); + scanInternal(rootPath, listener, totalFiles); + } + private void countFiles(Path dir, AtomicLong counter) { + if (cancelled) return; + + try (DirectoryStream stream = Files.newDirectoryStream(dir)) { + for (Path path : stream) { + if (cancelled) return; + + if (Files.isDirectory(path)) { + countFiles(path, counter); + } else if (Files.isRegularFile(path)) { + counter.incrementAndGet(); + } + } + } catch (IOException e) { + log.warn("Failed to pre-scan: {}", dir, e); + } + } + private void scanInternal(Path rootPath, FileScanListener listener, AtomicLong totalFiles) { + try { + validateDirectory(rootPath); + + forkJoinPool.submit(() -> { + try { + AtomicInteger processedFiles = new AtomicInteger(0); + scanDirectory(rootPath, listener, processedFiles, totalFiles, 0); + + if (!cancelled) { + listener.onScanComplete(); + } + } catch (Exception e) { + listener.onError(rootPath, e); + } + }).get(30, TimeUnit.SECONDS); + } catch (TimeoutException e) { + log.error("Scan timeout: {}", rootPath, e); + forkJoinPool.shutdownNow(); + listener.onError(rootPath, new TimeoutException("扫描超时30秒")); + } catch (Exception e) { + listener.onError(rootPath, e); + } + } + + private void scanDirectory(Path dir, FileScanListener listener, + AtomicInteger processedFiles, AtomicLong totalFiles, int currentDepth) { + if (cancelled || currentDepth > maxDepth) return; + + try (DirectoryStream stream = Files.newDirectoryStream(dir)) { + for (Path path : stream) { + if (cancelled) break; + + if (Files.isDirectory(path)) { + scanDirectory(path, listener, processedFiles, totalFiles, currentDepth + 1); + } else if (Files.isRegularFile(path)) { + processFile(path, listener, processedFiles, totalFiles); + } + } + } catch (IOException e) { + listener.onError(dir, e); + } + } + private void processFile(Path file, FileScanListener listener, + AtomicInteger processedFiles, AtomicLong totalFiles) { + if (cancelled) return; + + try { + listener.onFileFound(file); + + // 进度更新处理 + if (listener instanceof ProgressAwareListener progressListener && totalFiles != null) { + int processed = processedFiles.incrementAndGet(); + long total = totalFiles.get(); + progressListener.onProgressUpdate(processed, (int)total); + } + } catch (Exception e) { + listener.onError(file, e); + } + } + + private void validateDirectory(Path path) throws IOException { + if (!Files.exists(path)) { + throw new FileNotFoundException(path.toString()); + } + if (!Files.isReadable(path)) { + throw new AccessDeniedException(path.toString()); + } + // 检查是否是挂载点 + if (Files.getFileStore(path).type().equals("NTFS") && + path.toString().contains("$")) { + throw new IOException("系统目录禁止访问: " + path); + } + } + public void cancel() { + cancelled = true; + forkJoinPool.shutdownNow(); + } + + @Override + public void close() { + cancel(); + } +} \ No newline at end of file diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/ui/SceneManager.java b/src/main/java/top/r3944realms/docchecktoolrefactored/ui/SceneManager.java index feca4a1..1d0a141 100644 --- a/src/main/java/top/r3944realms/docchecktoolrefactored/ui/SceneManager.java +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/ui/SceneManager.java @@ -123,7 +123,7 @@ public class SceneManager { } /** - * Try get scene handler. + * Try to get scene handler. * * @param node the node * @param handler the handler diff --git a/src/main/java/top/r3944realms/docchecktoolrefactored/util/StringUtil.java b/src/main/java/top/r3944realms/docchecktoolrefactored/util/StringUtil.java new file mode 100644 index 0000000..c497fd1 --- /dev/null +++ b/src/main/java/top/r3944realms/docchecktoolrefactored/util/StringUtil.java @@ -0,0 +1,27 @@ +package top.r3944realms.docchecktoolrefactored.util; + +public class StringUtil { + public static String NO_BUG = """ + + _ooOoo_ + o8888888o + 88" . "88 + (| -_- |) + O\\ = /O + ____/`---'\\____ + .' \\\\| |// `. + / \\\\||| : |||// \\ + / _||||| -:- |||||- \\ + | | \\\\\\ - /// | | + | \\_| ''\\---/'' | | + \\ .-\\__ `-` ___/-. / + ___`. .' /--.--\\ `. . __ + ."" '< `.___\\_<|>_/___.' >'"". + | | : `- \\`.;`\\ _ /`;.`/ - ` : | | + \\ \\ `-. \\_ __\\ /__ _/ .-` / / + ======`-.____`-.___\\_____/___.-`____.-'====== + `=---=' + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + 佛祖保佑 永无BUG + """; +} diff --git a/src/test/java/module-info.java b/src/test/java/module-info.java new file mode 100644 index 0000000..de96eab --- /dev/null +++ b/src/test/java/module-info.java @@ -0,0 +1,9 @@ +module top.r3944realms.docchecktoolrefactored.test { + requires static lombok; + requires org.slf4j; + requires top.r3944realms.docchecktoolrefactored; + requires org.junit.jupiter.api; + + exports top.r3944realms.docchecktoolrefactored.test; + opens top.r3944realms.docchecktoolrefactored.test; +} \ No newline at end of file diff --git a/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateFinderPerformanceTest.java b/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateFinderPerformanceTest.java new file mode 100644 index 0000000..e139cb3 --- /dev/null +++ b/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateFinderPerformanceTest.java @@ -0,0 +1,67 @@ +package top.r3944realms.docchecktoolrefactored.test; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.RepeatedTest; +import top.r3944realms.docchecktoolrefactored.core.DuplicateFinder; +import top.r3944realms.docchecktoolrefactored.core.FileHashCalculator; +import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner; +import top.r3944realms.docchecktoolrefactored.io.scanner.RobustParallelScanner; + +import java.io.IOException; +import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class DuplicateFinderPerformanceTest { + + private static final String TEST_PATH = "D:/测试数据/JPG"; + private FileScanner scanner; + private FileHashCalculator hashCalculator; + + @BeforeEach + void setUp() { + scanner = new RobustParallelScanner(20); + hashCalculator = FileHashCalculator.defaultInstance(); + } + + @RepeatedTest(5) + void compareFinderPerformance() throws IOException { + // Test finder WITHOUT pre-counting + long startWithoutPrecount = System.nanoTime(); + DuplicateFinder finderWithoutPrecount = new DuplicateFinder(scanner, hashCalculator, false); + finderWithoutPrecount.findDuplicates(Paths.get(TEST_PATH)); + long durationWithoutPrecount = System.nanoTime() - startWithoutPrecount; + + // Test finder WITH pre-counting + long startWithPrecount = System.nanoTime(); + DuplicateFinder finderWithPrecount = new DuplicateFinder(scanner, hashCalculator, true); + finderWithPrecount.findDuplicates(Paths.get(TEST_PATH)); + long durationWithPrecount = System.nanoTime() - startWithPrecount; + + // Convert to milliseconds + long msWithout = TimeUnit.NANOSECONDS.toMillis(durationWithoutPrecount); + long msWith = TimeUnit.NANOSECONDS.toMillis(durationWithPrecount); + + System.out.println("Without pre-counting: " + msWithout + " ms"); + System.out.println("With pre-counting: " + msWith + " ms"); + +// // Assert that pre-counting provides benefit +// assertTrue(msWith < msWithout * 1.2, +// "Pre-counting version should not be more than 20% slower"); + } + + @Test + void verifySameResults() throws IOException { + DuplicateFinder finder1 = new DuplicateFinder(scanner, hashCalculator, false); + var result1 = finder1.findDuplicates(Paths.get(TEST_PATH)); + + DuplicateFinder finder2 = new DuplicateFinder(scanner, hashCalculator, true); + var result2 = finder2.findDuplicates(Paths.get(TEST_PATH)); + +// // Verify both methods find the same duplicates +// assertTrue(result1.containsAll(result2) && result2.containsAll(result1), +// "Both methods should find the same duplicate files"); + } +} \ No newline at end of file diff --git a/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateTest.java b/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateTest.java new file mode 100644 index 0000000..86e8cd2 --- /dev/null +++ b/src/test/java/top/r3944realms/docchecktoolrefactored/test/DuplicateTest.java @@ -0,0 +1,36 @@ +package top.r3944realms.docchecktoolrefactored.test; + +import lombok.extern.slf4j.Slf4j; +import top.r3944realms.docchecktoolrefactored.core.DuplicateFinder; +import top.r3944realms.docchecktoolrefactored.core.FileHashCalculator; +import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner; +import top.r3944realms.docchecktoolrefactored.io.scanner.RobustParallelScanner; +import top.r3944realms.docchecktoolrefactored.model.DuplicateGroup; + +import java.io.IOException; +import java.nio.file.Paths; +import java.util.List; + +@Slf4j +public class DuplicateTest { + public static void main(String[] args) throws IOException { + // 创建组件 + FileScanner scanner1 = new RobustParallelScanner(20); + FileHashCalculator hashCalculator1 = FileHashCalculator.defaultInstance(); + FileScanner scanner2 = new RobustParallelScanner(20); + FileHashCalculator hashCalculator2 = FileHashCalculator.defaultInstance(); + + // 执行查重 + DuplicateFinder finder = new DuplicateFinder(scanner1, hashCalculator1); + DuplicateFinder finder2 = new DuplicateFinder(scanner2, hashCalculator2, true); + List duplicates = finder.findDuplicates(Paths.get("H:\\nw0\\newworld(1)(1)")); + + // 处理结果 + duplicates.forEach(group -> { + log.info("发现重复文件组({} bytes):", group.size()); + group.fileMetas().forEach(file -> + log.info(" {}", file.getPath()) + ); + }); + } +} diff --git a/src/test/java/top/r3944realms/docchecktoolrefactored/test/ParallelFileScannerTest.java b/src/test/java/top/r3944realms/docchecktoolrefactored/test/ParallelFileScannerTest.java new file mode 100644 index 0000000..dde4fe8 --- /dev/null +++ b/src/test/java/top/r3944realms/docchecktoolrefactored/test/ParallelFileScannerTest.java @@ -0,0 +1,32 @@ +package top.r3944realms.docchecktoolrefactored.test; + +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +@Slf4j +public class ParallelFileScannerTest { + public static void main(String[] args) throws IOException { + test1(Path.of("D:/测试数据/JPG")); + } + private static void test1(Path rootPath) throws IOException { + @SuppressWarnings("resource") List files = Files.walk(rootPath) + .peek(p -> log.trace("visiting: {}", p)) + .parallel() + .filter(p -> { + boolean isRegular = Files.isRegularFile(p); + if (!isRegular) { + log.debug("Skip non-regular : {} ", p); + } + return isRegular; + }) + .peek(p -> log.trace("Found file: {}", p)) + .toList(); // 立即消费Stream + if (files.isEmpty()) { + log.warn("No files found in directory: {}", rootPath); + } else log.debug("Found {} files in {}", files.size(), rootPath); + } +}