feat:文件扫描功能、hash码计算与重复文件扫描的类实现
This commit is contained in:
parent
e764df736a
commit
5e2fbfe1f9
11
build.gradle
11
build.gradle
|
|
@ -1,3 +1,9 @@
|
|||
buildscript {
|
||||
repositories {
|
||||
google()
|
||||
mavenCentral()
|
||||
}
|
||||
}
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'io.franzbecker.gradle-lombok' version '3.0.0'
|
||||
|
|
@ -56,12 +62,17 @@ dependencies {
|
|||
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:${junitVersion}")
|
||||
}
|
||||
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
configurations.configureEach {
|
||||
exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl'
|
||||
}
|
||||
}
|
||||
// 可选:添加 testJar 任务
|
||||
tasks.register('testJar', Jar) {
|
||||
from sourceSets.test.output
|
||||
}
|
||||
|
||||
tasks.register('createLogDir') {
|
||||
doLast {
|
||||
|
|
|
|||
|
|
@ -8,11 +8,15 @@ module top.r3944realms.docchecktoolrefactored {
|
|||
opens top.r3944realms.docchecktoolrefactored to javafx.fxml;
|
||||
opens top.r3944realms.docchecktoolrefactored.ui to javafx.fxml;
|
||||
opens top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml;
|
||||
opens top.r3944realms.docchecktoolrefactored.deprecated to javafx.fxml;
|
||||
|
||||
exports top.r3944realms.docchecktoolrefactored to javafx.graphics;
|
||||
exports top.r3944realms.docchecktoolrefactored.ui to javafx.fxml;
|
||||
exports top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml;
|
||||
exports top.r3944realms.docchecktoolrefactored.deprecated to javafx.graphics;
|
||||
opens top.r3944realms.docchecktoolrefactored.deprecated to javafx.fxml;
|
||||
|
||||
exports top.r3944realms.docchecktoolrefactored.core ;
|
||||
exports top.r3944realms.docchecktoolrefactored.io.scanner;
|
||||
exports top.r3944realms.docchecktoolrefactored.io.reader;
|
||||
exports top.r3944realms.docchecktoolrefactored.model;
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import javafx.application.Application;
|
|||
import javafx.stage.Stage;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import top.r3944realms.docchecktoolrefactored.ui.SceneManager;
|
||||
import top.r3944realms.docchecktoolrefactored.util.StringUtil;
|
||||
|
||||
/**
|
||||
* The type Main.
|
||||
|
|
@ -28,7 +29,7 @@ public class Main extends Application {
|
|||
* @param args the input arguments
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
log.info("Hello World!");
|
||||
log.info(StringUtil.NO_BUG);
|
||||
launch(args);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
package top.r3944realms.docchecktoolrefactored.core;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
|
||||
import top.r3944realms.docchecktoolrefactored.model.DuplicateGroup;
|
||||
import top.r3944realms.docchecktoolrefactored.model.FileMetadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
/**
|
||||
* 重复文件查找核心类
|
||||
*/
|
||||
@Slf4j
|
||||
public class DuplicateFinder {
|
||||
private final FileScanner fileScanner;
|
||||
private final FileHashCalculator hashCalculator;
|
||||
private final boolean enableProgress;
|
||||
public DuplicateFinder(FileScanner fileScanner, FileHashCalculator hashCalculator, boolean enableProgress) {
|
||||
this.fileScanner = Objects.requireNonNull(fileScanner);
|
||||
this.hashCalculator = Objects.requireNonNull(hashCalculator);
|
||||
this.enableProgress = enableProgress;
|
||||
}
|
||||
public DuplicateFinder(FileScanner fileScanner, FileHashCalculator hashCalculator) {
|
||||
this(fileScanner, hashCalculator, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查找重复文件
|
||||
* @param rootDir 要扫描的根目录
|
||||
* @return 按哈希值分组的重复文件列表
|
||||
*/
|
||||
public List<DuplicateGroup> findDuplicates(Path rootDir) throws IOException {
|
||||
// 第一阶段:按文件大小分组
|
||||
Map<Long, List<FileMetadata>> sizeGroups = groupFilesBySize(rootDir);
|
||||
|
||||
// 第二阶段:对可能重复的文件计算哈希
|
||||
Map<String, List<FileMetadata>> hashGroups = new ConcurrentHashMap<>();
|
||||
|
||||
sizeGroups.values().parallelStream()
|
||||
.filter(group -> group.size() > 1) // 只处理可能重复的文件
|
||||
.forEach(group -> group.parallelStream().forEach(file -> {
|
||||
try {
|
||||
String hash = hashCalculator.calculateHash(file.getPath());
|
||||
file.setHash(hash);
|
||||
hashGroups.computeIfAbsent(hash, k -> new ArrayList<>()).add(file);
|
||||
} catch (IOException e) {
|
||||
// 记录错误但继续处理其他文件
|
||||
log.error("Failed to calculate file's hash: {}, {}", file.getPath(), e.getMessage());
|
||||
}
|
||||
}));
|
||||
|
||||
// 第三阶段:构建结果
|
||||
return hashGroups.values().stream()
|
||||
.filter(group -> group.size() > 1)
|
||||
.map(group -> new DuplicateGroup(
|
||||
group.get(0).getHash(),
|
||||
group.get(0).getSize(),
|
||||
group
|
||||
))
|
||||
.sorted(Comparator.comparingLong(DuplicateGroup::size).reversed())
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* 按文件大小分组
|
||||
*/
|
||||
private Map<Long, List<FileMetadata>> groupFilesBySize(Path rootDir) throws IOException {
|
||||
Map<Long, List<FileMetadata>> sizeGroups = new ConcurrentHashMap<>();
|
||||
FileScanner.ProgressAwareListener listener = new FileScanner.ProgressAwareListener() {
|
||||
@Override
|
||||
public void onProgressUpdate(int current, int total) {
|
||||
log.info("Scanning progress: {}/{} ", current, total);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFileFound(Path file) {
|
||||
try {
|
||||
FileMetadata meta = new FileMetadata();
|
||||
meta.setPath(file);
|
||||
meta.setSize(Files.size(file));
|
||||
sizeGroups.computeIfAbsent(meta.getSize(), k -> new ArrayList<>()).add(meta);
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to get file's size: {}", file);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public void onScanComplete() {}
|
||||
@Override public void onError(Path file, Exception e) {
|
||||
log.error("Error on scanning file: {}, {}", file, e.getMessage());
|
||||
}
|
||||
};
|
||||
if(enableProgress)
|
||||
fileScanner.scanWithProgress(rootDir, listener);
|
||||
else
|
||||
fileScanner.scan(rootDir, listener);
|
||||
return sizeGroups;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
package top.r3944realms.docchecktoolrefactored.core;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* 文件哈希计算策略接口
|
||||
*/
|
||||
public interface FileHashCalculator {
|
||||
/**
|
||||
* 计算文件哈希值
|
||||
* @param file 要计算的文件路径
|
||||
* @return 文件的哈希值字符串
|
||||
*/
|
||||
String calculateHash(Path file) throws IOException;
|
||||
|
||||
/**
|
||||
* 默认实现使用MD5
|
||||
*/
|
||||
static FileHashCalculator defaultInstance() {
|
||||
return new MD5HashCalculator();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
package top.r3944realms.docchecktoolrefactored.core;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
/**
|
||||
* MD5哈希计算实现
|
||||
*/
|
||||
public class MD5HashCalculator implements FileHashCalculator {
|
||||
private static final int BUFFER_SIZE = 8192;
|
||||
|
||||
@Override
|
||||
public String calculateHash(Path file) throws IOException {
|
||||
try {
|
||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
||||
try (var is = Files.newInputStream(file)) {
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int bytesRead;
|
||||
while ((bytesRead = is.read(buffer)) != -1) {
|
||||
md.update(buffer, 0, bytesRead);
|
||||
}
|
||||
}
|
||||
return bytesToHex(md.digest());
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException("MD5算法不可用", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static String bytesToHex(byte[] bytes) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (byte b : bytes) {
|
||||
sb.append(String.format("%02x", b));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -12,7 +12,15 @@ public interface FileScanner {
|
|||
* @param rootPath 根路径
|
||||
* @param listener 文件发现监听器
|
||||
*/
|
||||
void scan(Path rootPath, FileScanListener listener);
|
||||
default void scan(Path rootPath, FileScanListener listener) {
|
||||
throw new UnsupportedOperationException("Please implement FileScanner, FileScannerListener.");
|
||||
}
|
||||
/**
|
||||
* 扫描指定路径下的文件(带进度反馈)
|
||||
*/
|
||||
default void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
|
||||
throw new UnsupportedOperationException("Please implement FileScanner, ProgressAwareListener.");
|
||||
}
|
||||
|
||||
/**
|
||||
* 文件扫描监听器
|
||||
|
|
@ -38,4 +46,14 @@ public interface FileScanner {
|
|||
*/
|
||||
void onError(Path file, Exception e);
|
||||
}
|
||||
|
||||
|
||||
interface ProgressAwareListener extends FileScanListener {
|
||||
/**
|
||||
* 进度更新回调
|
||||
* @param current 当前已处理文件数
|
||||
* @param total 预估总文件数(可能动态增长)
|
||||
*/
|
||||
void onProgressUpdate(int current, int total);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,35 @@
|
|||
package top.r3944realms.docchecktoolrefactored.io.scanner;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* The type Parallel file scanner.
|
||||
* <p>
|
||||
* 这个没法正常使用,目前遇到的问题
|
||||
* <p>
|
||||
* * 目录遍历时遇到权限问题(静默失败)
|
||||
* <p>
|
||||
* * 存在符号链接循环
|
||||
* <p>
|
||||
* * 文件系统驱动程序卡死
|
||||
* <p>
|
||||
* * JVM与NTFS文件系统兼容性问题
|
||||
*/
|
||||
public class ParallelFileScanner implements FileScanner,AutoCloseable {
|
||||
@Slf4j
|
||||
@Deprecated
|
||||
public class ParallelFileScanner implements FileScanner ,AutoCloseable {
|
||||
private final ForkJoinPool forkJoinPool;
|
||||
|
||||
private volatile boolean cancelled = false;
|
||||
/**
|
||||
* 使用默认并行度(CPU核心数)
|
||||
*/
|
||||
|
|
@ -31,38 +48,90 @@ public class ParallelFileScanner implements FileScanner,AutoCloseable {
|
|||
|
||||
@Override
|
||||
public void scan(Path rootPath, FileScanListener listener) {
|
||||
forkJoinPool.submit(() -> {
|
||||
try (
|
||||
Stream<Path> pathStream = Files.walk(rootPath)
|
||||
.parallel() // 使用ForkJoinPool的并行流
|
||||
.filter(Files::isRegularFile)
|
||||
){
|
||||
pathStream.forEach(file -> {
|
||||
try {
|
||||
listener.onFileFound(file);
|
||||
} catch (Exception e) {
|
||||
listener.onError(file, e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
).join();
|
||||
scanInternal(rootPath, listener, null);
|
||||
}
|
||||
@Override
|
||||
public void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
|
||||
// 先快速统计总文件数
|
||||
long totalFiles = countFiles(rootPath);
|
||||
scanInternal(rootPath, listener, totalFiles);
|
||||
}
|
||||
|
||||
listener.onScanComplete();
|
||||
private long countFiles(Path rootPath) {
|
||||
try(Stream<Path> pathStream = Files.walk(rootPath)
|
||||
.parallel()
|
||||
.filter(Files::isRegularFile)) {
|
||||
return pathStream.count();
|
||||
} catch (IOException e) {
|
||||
return -1; // 表示无法确定总数
|
||||
}
|
||||
}
|
||||
private void scanInternal(Path rootPath, FileScanListener listener, Long totalFiles) {
|
||||
log.debug("ThreadPool Status: {}", forkJoinPool.isShutdown() ? "Closed" : "Running");
|
||||
forkJoinPool.submit(() -> { // 方法没问题,可能就是在线程这里被卡死了
|
||||
try {
|
||||
AtomicInteger processed = new AtomicInteger(0);
|
||||
log.debug("Scanning files in {}", rootPath);
|
||||
// 收集所有文件到List(避免Stream被重复使用)
|
||||
@SuppressWarnings("resource") List<Path> files = Files.walk(rootPath)
|
||||
.peek(p -> log.trace("visiting: {}", p))
|
||||
.parallel()
|
||||
.filter(p -> {
|
||||
boolean isRegular = Files.isRegularFile(p);
|
||||
if (!isRegular) {
|
||||
log.debug("Skip non-regular : {} ", p);
|
||||
}
|
||||
return isRegular;
|
||||
})
|
||||
.peek(p -> log.trace("Found file: {}", p))
|
||||
.toList(); // 立即消费Stream
|
||||
if (files.isEmpty()) {
|
||||
log.warn("No files found in directory: {}", rootPath);
|
||||
} else log.debug("Found {} files in {}", files.size(), rootPath);
|
||||
files.forEach(file -> {
|
||||
if (cancelled) {
|
||||
log.debug("Cancelled scanning file {}", file);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
log.debug("Handle file {}", file);
|
||||
listener.onFileFound(file);
|
||||
|
||||
// 进度更新
|
||||
if (listener instanceof ProgressAwareListener progressListener) {
|
||||
int current = processed.incrementAndGet();
|
||||
progressListener.onProgressUpdate(
|
||||
current,
|
||||
totalFiles != null ? totalFiles.intValue() : -1
|
||||
);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug("Error Handle file {}", file, e);
|
||||
listener.onError(file, e);
|
||||
}
|
||||
});
|
||||
if (!cancelled) {
|
||||
log.debug("Finished scanning files in {}", rootPath);
|
||||
listener.onScanComplete();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
listener.onError(rootPath, e);
|
||||
} catch (Exception e) {
|
||||
log.error("Unexpected error in scan thread", e);
|
||||
listener.onError(rootPath, e);
|
||||
}
|
||||
});
|
||||
log.debug("Task submitted to thread pool");
|
||||
}
|
||||
|
||||
public void cancel() {
|
||||
cancelled = true;
|
||||
forkJoinPool.shutdownNow();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
forkJoinPool.shutdown();
|
||||
try {
|
||||
if (!forkJoinPool.awaitTermination(1, TimeUnit.SECONDS)) {
|
||||
forkJoinPool.shutdownNow();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
forkJoinPool.shutdownNow();
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
cancel();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
package top.r3944realms.docchecktoolrefactored.io.scanner;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.AccessDeniedException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
@Slf4j
|
||||
public class RobustParallelScanner implements FileScanner, AutoCloseable {
|
||||
private final ForkJoinPool forkJoinPool;
|
||||
private volatile boolean cancelled = false;
|
||||
private final int maxDepth;
|
||||
public RobustParallelScanner(int maxDepth) {
|
||||
this(Runtime.getRuntime().availableProcessors(), maxDepth);
|
||||
}
|
||||
public RobustParallelScanner(int parallelism, int maxDepth) {
|
||||
this.forkJoinPool = new ForkJoinPool(parallelism);
|
||||
this.maxDepth = maxDepth; // 防止无限递归
|
||||
}
|
||||
|
||||
@Override
|
||||
public void scan(Path rootPath, FileScanListener listener) {
|
||||
scanInternal(rootPath, listener, null);
|
||||
}
|
||||
@Override
|
||||
public void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
|
||||
// 预扫描阶段:计算总文件数
|
||||
AtomicLong totalFiles = new AtomicLong(0);
|
||||
countFiles(rootPath, totalFiles);
|
||||
scanInternal(rootPath, listener, totalFiles);
|
||||
}
|
||||
private void countFiles(Path dir, AtomicLong counter) {
|
||||
if (cancelled) return;
|
||||
|
||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
|
||||
for (Path path : stream) {
|
||||
if (cancelled) return;
|
||||
|
||||
if (Files.isDirectory(path)) {
|
||||
countFiles(path, counter);
|
||||
} else if (Files.isRegularFile(path)) {
|
||||
counter.incrementAndGet();
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.warn("Failed to pre-scan: {}", dir, e);
|
||||
}
|
||||
}
|
||||
private void scanInternal(Path rootPath, FileScanListener listener, AtomicLong totalFiles) {
|
||||
try {
|
||||
validateDirectory(rootPath);
|
||||
|
||||
forkJoinPool.submit(() -> {
|
||||
try {
|
||||
AtomicInteger processedFiles = new AtomicInteger(0);
|
||||
scanDirectory(rootPath, listener, processedFiles, totalFiles, 0);
|
||||
|
||||
if (!cancelled) {
|
||||
listener.onScanComplete();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
listener.onError(rootPath, e);
|
||||
}
|
||||
}).get(30, TimeUnit.SECONDS);
|
||||
} catch (TimeoutException e) {
|
||||
log.error("Scan timeout: {}", rootPath, e);
|
||||
forkJoinPool.shutdownNow();
|
||||
listener.onError(rootPath, new TimeoutException("扫描超时30秒"));
|
||||
} catch (Exception e) {
|
||||
listener.onError(rootPath, e);
|
||||
}
|
||||
}
|
||||
|
||||
private void scanDirectory(Path dir, FileScanListener listener,
|
||||
AtomicInteger processedFiles, AtomicLong totalFiles, int currentDepth) {
|
||||
if (cancelled || currentDepth > maxDepth) return;
|
||||
|
||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
|
||||
for (Path path : stream) {
|
||||
if (cancelled) break;
|
||||
|
||||
if (Files.isDirectory(path)) {
|
||||
scanDirectory(path, listener, processedFiles, totalFiles, currentDepth + 1);
|
||||
} else if (Files.isRegularFile(path)) {
|
||||
processFile(path, listener, processedFiles, totalFiles);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
listener.onError(dir, e);
|
||||
}
|
||||
}
|
||||
private void processFile(Path file, FileScanListener listener,
|
||||
AtomicInteger processedFiles, AtomicLong totalFiles) {
|
||||
if (cancelled) return;
|
||||
|
||||
try {
|
||||
listener.onFileFound(file);
|
||||
|
||||
// 进度更新处理
|
||||
if (listener instanceof ProgressAwareListener progressListener && totalFiles != null) {
|
||||
int processed = processedFiles.incrementAndGet();
|
||||
long total = totalFiles.get();
|
||||
progressListener.onProgressUpdate(processed, (int)total);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
listener.onError(file, e);
|
||||
}
|
||||
}
|
||||
|
||||
private void validateDirectory(Path path) throws IOException {
|
||||
if (!Files.exists(path)) {
|
||||
throw new FileNotFoundException(path.toString());
|
||||
}
|
||||
if (!Files.isReadable(path)) {
|
||||
throw new AccessDeniedException(path.toString());
|
||||
}
|
||||
// 检查是否是挂载点
|
||||
if (Files.getFileStore(path).type().equals("NTFS") &&
|
||||
path.toString().contains("$")) {
|
||||
throw new IOException("系统目录禁止访问: " + path);
|
||||
}
|
||||
}
|
||||
public void cancel() {
|
||||
cancelled = true;
|
||||
forkJoinPool.shutdownNow();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
cancel();
|
||||
}
|
||||
}
|
||||
|
|
@ -123,7 +123,7 @@ public class SceneManager {
|
|||
}
|
||||
|
||||
/**
|
||||
* Try get scene handler.
|
||||
* Try to get scene handler.
|
||||
*
|
||||
* @param node the node
|
||||
* @param handler the handler
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
package top.r3944realms.docchecktoolrefactored.util;
|
||||
|
||||
public class StringUtil {
|
||||
public static String NO_BUG = """
|
||||
|
||||
_ooOoo_
|
||||
o8888888o
|
||||
88" . "88
|
||||
(| -_- |)
|
||||
O\\ = /O
|
||||
____/`---'\\____
|
||||
.' \\\\| |// `.
|
||||
/ \\\\||| : |||// \\
|
||||
/ _||||| -:- |||||- \\
|
||||
| | \\\\\\ - /// | |
|
||||
| \\_| ''\\---/'' | |
|
||||
\\ .-\\__ `-` ___/-. /
|
||||
___`. .' /--.--\\ `. . __
|
||||
."" '< `.___\\_<|>_/___.' >'"".
|
||||
| | : `- \\`.;`\\ _ /`;.`/ - ` : | |
|
||||
\\ \\ `-. \\_ __\\ /__ _/ .-` / /
|
||||
======`-.____`-.___\\_____/___.-`____.-'======
|
||||
`=---='
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
佛祖保佑 永无BUG
|
||||
""";
|
||||
}
|
||||
9
src/test/java/module-info.java
Normal file
9
src/test/java/module-info.java
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
module top.r3944realms.docchecktoolrefactored.test {
|
||||
requires static lombok;
|
||||
requires org.slf4j;
|
||||
requires top.r3944realms.docchecktoolrefactored;
|
||||
requires org.junit.jupiter.api;
|
||||
|
||||
exports top.r3944realms.docchecktoolrefactored.test;
|
||||
opens top.r3944realms.docchecktoolrefactored.test;
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
package top.r3944realms.docchecktoolrefactored.test;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.RepeatedTest;
|
||||
import top.r3944realms.docchecktoolrefactored.core.DuplicateFinder;
|
||||
import top.r3944realms.docchecktoolrefactored.core.FileHashCalculator;
|
||||
import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
|
||||
import top.r3944realms.docchecktoolrefactored.io.scanner.RobustParallelScanner;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class DuplicateFinderPerformanceTest {
|
||||
|
||||
private static final String TEST_PATH = "D:/测试数据/JPG";
|
||||
private FileScanner scanner;
|
||||
private FileHashCalculator hashCalculator;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
scanner = new RobustParallelScanner(20);
|
||||
hashCalculator = FileHashCalculator.defaultInstance();
|
||||
}
|
||||
|
||||
@RepeatedTest(5)
|
||||
void compareFinderPerformance() throws IOException {
|
||||
// Test finder WITHOUT pre-counting
|
||||
long startWithoutPrecount = System.nanoTime();
|
||||
DuplicateFinder finderWithoutPrecount = new DuplicateFinder(scanner, hashCalculator, false);
|
||||
finderWithoutPrecount.findDuplicates(Paths.get(TEST_PATH));
|
||||
long durationWithoutPrecount = System.nanoTime() - startWithoutPrecount;
|
||||
|
||||
// Test finder WITH pre-counting
|
||||
long startWithPrecount = System.nanoTime();
|
||||
DuplicateFinder finderWithPrecount = new DuplicateFinder(scanner, hashCalculator, true);
|
||||
finderWithPrecount.findDuplicates(Paths.get(TEST_PATH));
|
||||
long durationWithPrecount = System.nanoTime() - startWithPrecount;
|
||||
|
||||
// Convert to milliseconds
|
||||
long msWithout = TimeUnit.NANOSECONDS.toMillis(durationWithoutPrecount);
|
||||
long msWith = TimeUnit.NANOSECONDS.toMillis(durationWithPrecount);
|
||||
|
||||
System.out.println("Without pre-counting: " + msWithout + " ms");
|
||||
System.out.println("With pre-counting: " + msWith + " ms");
|
||||
|
||||
// // Assert that pre-counting provides benefit
|
||||
// assertTrue(msWith < msWithout * 1.2,
|
||||
// "Pre-counting version should not be more than 20% slower");
|
||||
}
|
||||
|
||||
@Test
|
||||
void verifySameResults() throws IOException {
|
||||
DuplicateFinder finder1 = new DuplicateFinder(scanner, hashCalculator, false);
|
||||
var result1 = finder1.findDuplicates(Paths.get(TEST_PATH));
|
||||
|
||||
DuplicateFinder finder2 = new DuplicateFinder(scanner, hashCalculator, true);
|
||||
var result2 = finder2.findDuplicates(Paths.get(TEST_PATH));
|
||||
|
||||
// // Verify both methods find the same duplicates
|
||||
// assertTrue(result1.containsAll(result2) && result2.containsAll(result1),
|
||||
// "Both methods should find the same duplicate files");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
package top.r3944realms.docchecktoolrefactored.test;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import top.r3944realms.docchecktoolrefactored.core.DuplicateFinder;
|
||||
import top.r3944realms.docchecktoolrefactored.core.FileHashCalculator;
|
||||
import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
|
||||
import top.r3944realms.docchecktoolrefactored.io.scanner.RobustParallelScanner;
|
||||
import top.r3944realms.docchecktoolrefactored.model.DuplicateGroup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
public class DuplicateTest {
|
||||
public static void main(String[] args) throws IOException {
|
||||
// 创建组件
|
||||
FileScanner scanner1 = new RobustParallelScanner(20);
|
||||
FileHashCalculator hashCalculator1 = FileHashCalculator.defaultInstance();
|
||||
FileScanner scanner2 = new RobustParallelScanner(20);
|
||||
FileHashCalculator hashCalculator2 = FileHashCalculator.defaultInstance();
|
||||
|
||||
// 执行查重
|
||||
DuplicateFinder finder = new DuplicateFinder(scanner1, hashCalculator1);
|
||||
DuplicateFinder finder2 = new DuplicateFinder(scanner2, hashCalculator2, true);
|
||||
List<DuplicateGroup> duplicates = finder.findDuplicates(Paths.get("H:\\nw0\\newworld(1)(1)"));
|
||||
|
||||
// 处理结果
|
||||
duplicates.forEach(group -> {
|
||||
log.info("发现重复文件组({} bytes):", group.size());
|
||||
group.fileMetas().forEach(file ->
|
||||
log.info(" {}", file.getPath())
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
package top.r3944realms.docchecktoolrefactored.test;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
public class ParallelFileScannerTest {
|
||||
public static void main(String[] args) throws IOException {
|
||||
test1(Path.of("D:/测试数据/JPG"));
|
||||
}
|
||||
private static void test1(Path rootPath) throws IOException {
|
||||
@SuppressWarnings("resource") List<Path> files = Files.walk(rootPath)
|
||||
.peek(p -> log.trace("visiting: {}", p))
|
||||
.parallel()
|
||||
.filter(p -> {
|
||||
boolean isRegular = Files.isRegularFile(p);
|
||||
if (!isRegular) {
|
||||
log.debug("Skip non-regular : {} ", p);
|
||||
}
|
||||
return isRegular;
|
||||
})
|
||||
.peek(p -> log.trace("Found file: {}", p))
|
||||
.toList(); // 立即消费Stream
|
||||
if (files.isEmpty()) {
|
||||
log.warn("No files found in directory: {}", rootPath);
|
||||
} else log.debug("Found {} files in {}", files.size(), rootPath);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user