feat:文件扫描功能、hash码计算与重复文件扫描的类实现
This commit is contained in:
parent
e764df736a
commit
5e2fbfe1f9
11
build.gradle
11
build.gradle
|
|
@ -1,3 +1,9 @@
|
||||||
|
buildscript {
|
||||||
|
repositories {
|
||||||
|
google()
|
||||||
|
mavenCentral()
|
||||||
|
}
|
||||||
|
}
|
||||||
plugins {
|
plugins {
|
||||||
id 'java'
|
id 'java'
|
||||||
id 'io.franzbecker.gradle-lombok' version '3.0.0'
|
id 'io.franzbecker.gradle-lombok' version '3.0.0'
|
||||||
|
|
@ -56,12 +62,17 @@ dependencies {
|
||||||
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:${junitVersion}")
|
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:${junitVersion}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
test {
|
test {
|
||||||
useJUnitPlatform()
|
useJUnitPlatform()
|
||||||
configurations.configureEach {
|
configurations.configureEach {
|
||||||
exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl'
|
exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// 可选:添加 testJar 任务
|
||||||
|
tasks.register('testJar', Jar) {
|
||||||
|
from sourceSets.test.output
|
||||||
|
}
|
||||||
|
|
||||||
tasks.register('createLogDir') {
|
tasks.register('createLogDir') {
|
||||||
doLast {
|
doLast {
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,15 @@ module top.r3944realms.docchecktoolrefactored {
|
||||||
opens top.r3944realms.docchecktoolrefactored to javafx.fxml;
|
opens top.r3944realms.docchecktoolrefactored to javafx.fxml;
|
||||||
opens top.r3944realms.docchecktoolrefactored.ui to javafx.fxml;
|
opens top.r3944realms.docchecktoolrefactored.ui to javafx.fxml;
|
||||||
opens top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml;
|
opens top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml;
|
||||||
|
opens top.r3944realms.docchecktoolrefactored.deprecated to javafx.fxml;
|
||||||
|
|
||||||
exports top.r3944realms.docchecktoolrefactored to javafx.graphics;
|
exports top.r3944realms.docchecktoolrefactored to javafx.graphics;
|
||||||
exports top.r3944realms.docchecktoolrefactored.ui to javafx.fxml;
|
exports top.r3944realms.docchecktoolrefactored.ui to javafx.fxml;
|
||||||
exports top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml;
|
exports top.r3944realms.docchecktoolrefactored.ui.module to javafx.fxml;
|
||||||
exports top.r3944realms.docchecktoolrefactored.deprecated to javafx.graphics;
|
exports top.r3944realms.docchecktoolrefactored.deprecated to javafx.graphics;
|
||||||
opens top.r3944realms.docchecktoolrefactored.deprecated to javafx.fxml;
|
|
||||||
|
|
||||||
|
exports top.r3944realms.docchecktoolrefactored.core ;
|
||||||
|
exports top.r3944realms.docchecktoolrefactored.io.scanner;
|
||||||
|
exports top.r3944realms.docchecktoolrefactored.io.reader;
|
||||||
|
exports top.r3944realms.docchecktoolrefactored.model;
|
||||||
}
|
}
|
||||||
|
|
@ -4,6 +4,7 @@ import javafx.application.Application;
|
||||||
import javafx.stage.Stage;
|
import javafx.stage.Stage;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import top.r3944realms.docchecktoolrefactored.ui.SceneManager;
|
import top.r3944realms.docchecktoolrefactored.ui.SceneManager;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The type Main.
|
* The type Main.
|
||||||
|
|
@ -28,7 +29,7 @@ public class Main extends Application {
|
||||||
* @param args the input arguments
|
* @param args the input arguments
|
||||||
*/
|
*/
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
log.info("Hello World!");
|
log.info(StringUtil.NO_BUG);
|
||||||
launch(args);
|
launch(args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
package top.r3944realms.docchecktoolrefactored.core;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.model.DuplicateGroup;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.model.FileMetadata;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 重复文件查找核心类
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
public class DuplicateFinder {
|
||||||
|
private final FileScanner fileScanner;
|
||||||
|
private final FileHashCalculator hashCalculator;
|
||||||
|
private final boolean enableProgress;
|
||||||
|
public DuplicateFinder(FileScanner fileScanner, FileHashCalculator hashCalculator, boolean enableProgress) {
|
||||||
|
this.fileScanner = Objects.requireNonNull(fileScanner);
|
||||||
|
this.hashCalculator = Objects.requireNonNull(hashCalculator);
|
||||||
|
this.enableProgress = enableProgress;
|
||||||
|
}
|
||||||
|
public DuplicateFinder(FileScanner fileScanner, FileHashCalculator hashCalculator) {
|
||||||
|
this(fileScanner, hashCalculator, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 查找重复文件
|
||||||
|
* @param rootDir 要扫描的根目录
|
||||||
|
* @return 按哈希值分组的重复文件列表
|
||||||
|
*/
|
||||||
|
public List<DuplicateGroup> findDuplicates(Path rootDir) throws IOException {
|
||||||
|
// 第一阶段:按文件大小分组
|
||||||
|
Map<Long, List<FileMetadata>> sizeGroups = groupFilesBySize(rootDir);
|
||||||
|
|
||||||
|
// 第二阶段:对可能重复的文件计算哈希
|
||||||
|
Map<String, List<FileMetadata>> hashGroups = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
sizeGroups.values().parallelStream()
|
||||||
|
.filter(group -> group.size() > 1) // 只处理可能重复的文件
|
||||||
|
.forEach(group -> group.parallelStream().forEach(file -> {
|
||||||
|
try {
|
||||||
|
String hash = hashCalculator.calculateHash(file.getPath());
|
||||||
|
file.setHash(hash);
|
||||||
|
hashGroups.computeIfAbsent(hash, k -> new ArrayList<>()).add(file);
|
||||||
|
} catch (IOException e) {
|
||||||
|
// 记录错误但继续处理其他文件
|
||||||
|
log.error("Failed to calculate file's hash: {}, {}", file.getPath(), e.getMessage());
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
// 第三阶段:构建结果
|
||||||
|
return hashGroups.values().stream()
|
||||||
|
.filter(group -> group.size() > 1)
|
||||||
|
.map(group -> new DuplicateGroup(
|
||||||
|
group.get(0).getHash(),
|
||||||
|
group.get(0).getSize(),
|
||||||
|
group
|
||||||
|
))
|
||||||
|
.sorted(Comparator.comparingLong(DuplicateGroup::size).reversed())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按文件大小分组
|
||||||
|
*/
|
||||||
|
private Map<Long, List<FileMetadata>> groupFilesBySize(Path rootDir) throws IOException {
|
||||||
|
Map<Long, List<FileMetadata>> sizeGroups = new ConcurrentHashMap<>();
|
||||||
|
FileScanner.ProgressAwareListener listener = new FileScanner.ProgressAwareListener() {
|
||||||
|
@Override
|
||||||
|
public void onProgressUpdate(int current, int total) {
|
||||||
|
log.info("Scanning progress: {}/{} ", current, total);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onFileFound(Path file) {
|
||||||
|
try {
|
||||||
|
FileMetadata meta = new FileMetadata();
|
||||||
|
meta.setPath(file);
|
||||||
|
meta.setSize(Files.size(file));
|
||||||
|
sizeGroups.computeIfAbsent(meta.getSize(), k -> new ArrayList<>()).add(meta);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("Failed to get file's size: {}", file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public void onScanComplete() {}
|
||||||
|
@Override public void onError(Path file, Exception e) {
|
||||||
|
log.error("Error on scanning file: {}, {}", file, e.getMessage());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if(enableProgress)
|
||||||
|
fileScanner.scanWithProgress(rootDir, listener);
|
||||||
|
else
|
||||||
|
fileScanner.scan(rootDir, listener);
|
||||||
|
return sizeGroups;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
package top.r3944realms.docchecktoolrefactored.core;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 文件哈希计算策略接口
|
||||||
|
*/
|
||||||
|
public interface FileHashCalculator {
|
||||||
|
/**
|
||||||
|
* 计算文件哈希值
|
||||||
|
* @param file 要计算的文件路径
|
||||||
|
* @return 文件的哈希值字符串
|
||||||
|
*/
|
||||||
|
String calculateHash(Path file) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 默认实现使用MD5
|
||||||
|
*/
|
||||||
|
static FileHashCalculator defaultInstance() {
|
||||||
|
return new MD5HashCalculator();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,39 @@
|
||||||
|
package top.r3944realms.docchecktoolrefactored.core;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.security.MessageDigest;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MD5哈希计算实现
|
||||||
|
*/
|
||||||
|
public class MD5HashCalculator implements FileHashCalculator {
|
||||||
|
private static final int BUFFER_SIZE = 8192;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String calculateHash(Path file) throws IOException {
|
||||||
|
try {
|
||||||
|
MessageDigest md = MessageDigest.getInstance("MD5");
|
||||||
|
try (var is = Files.newInputStream(file)) {
|
||||||
|
byte[] buffer = new byte[BUFFER_SIZE];
|
||||||
|
int bytesRead;
|
||||||
|
while ((bytesRead = is.read(buffer)) != -1) {
|
||||||
|
md.update(buffer, 0, bytesRead);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bytesToHex(md.digest());
|
||||||
|
} catch (NoSuchAlgorithmException e) {
|
||||||
|
throw new RuntimeException("MD5算法不可用", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String bytesToHex(byte[] bytes) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (byte b : bytes) {
|
||||||
|
sb.append(String.format("%02x", b));
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -12,7 +12,15 @@ public interface FileScanner {
|
||||||
* @param rootPath 根路径
|
* @param rootPath 根路径
|
||||||
* @param listener 文件发现监听器
|
* @param listener 文件发现监听器
|
||||||
*/
|
*/
|
||||||
void scan(Path rootPath, FileScanListener listener);
|
default void scan(Path rootPath, FileScanListener listener) {
|
||||||
|
throw new UnsupportedOperationException("Please implement FileScanner, FileScannerListener.");
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* 扫描指定路径下的文件(带进度反馈)
|
||||||
|
*/
|
||||||
|
default void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
|
||||||
|
throw new UnsupportedOperationException("Please implement FileScanner, ProgressAwareListener.");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 文件扫描监听器
|
* 文件扫描监听器
|
||||||
|
|
@ -38,4 +46,14 @@ public interface FileScanner {
|
||||||
*/
|
*/
|
||||||
void onError(Path file, Exception e);
|
void onError(Path file, Exception e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
interface ProgressAwareListener extends FileScanListener {
|
||||||
|
/**
|
||||||
|
* 进度更新回调
|
||||||
|
* @param current 当前已处理文件数
|
||||||
|
* @param total 预估总文件数(可能动态增长)
|
||||||
|
*/
|
||||||
|
void onProgressUpdate(int current, int total);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,35 @@
|
||||||
package top.r3944realms.docchecktoolrefactored.io.scanner;
|
package top.r3944realms.docchecktoolrefactored.io.scanner;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
import java.util.concurrent.ForkJoinPool;
|
import java.util.concurrent.ForkJoinPool;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The type Parallel file scanner.
|
* The type Parallel file scanner.
|
||||||
|
* <p>
|
||||||
|
* 这个没法正常使用,目前遇到的问题
|
||||||
|
* <p>
|
||||||
|
* * 目录遍历时遇到权限问题(静默失败)
|
||||||
|
* <p>
|
||||||
|
* * 存在符号链接循环
|
||||||
|
* <p>
|
||||||
|
* * 文件系统驱动程序卡死
|
||||||
|
* <p>
|
||||||
|
* * JVM与NTFS文件系统兼容性问题
|
||||||
*/
|
*/
|
||||||
public class ParallelFileScanner implements FileScanner,AutoCloseable {
|
@Slf4j
|
||||||
|
@Deprecated
|
||||||
|
public class ParallelFileScanner implements FileScanner ,AutoCloseable {
|
||||||
private final ForkJoinPool forkJoinPool;
|
private final ForkJoinPool forkJoinPool;
|
||||||
|
private volatile boolean cancelled = false;
|
||||||
/**
|
/**
|
||||||
* 使用默认并行度(CPU核心数)
|
* 使用默认并行度(CPU核心数)
|
||||||
*/
|
*/
|
||||||
|
|
@ -31,38 +48,90 @@ public class ParallelFileScanner implements FileScanner,AutoCloseable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scan(Path rootPath, FileScanListener listener) {
|
public void scan(Path rootPath, FileScanListener listener) {
|
||||||
forkJoinPool.submit(() -> {
|
scanInternal(rootPath, listener, null);
|
||||||
try (
|
}
|
||||||
Stream<Path> pathStream = Files.walk(rootPath)
|
@Override
|
||||||
.parallel() // 使用ForkJoinPool的并行流
|
public void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
|
||||||
.filter(Files::isRegularFile)
|
// 先快速统计总文件数
|
||||||
){
|
long totalFiles = countFiles(rootPath);
|
||||||
pathStream.forEach(file -> {
|
scanInternal(rootPath, listener, totalFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
private long countFiles(Path rootPath) {
|
||||||
|
try(Stream<Path> pathStream = Files.walk(rootPath)
|
||||||
|
.parallel()
|
||||||
|
.filter(Files::isRegularFile)) {
|
||||||
|
return pathStream.count();
|
||||||
|
} catch (IOException e) {
|
||||||
|
return -1; // 表示无法确定总数
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private void scanInternal(Path rootPath, FileScanListener listener, Long totalFiles) {
|
||||||
|
log.debug("ThreadPool Status: {}", forkJoinPool.isShutdown() ? "Closed" : "Running");
|
||||||
|
forkJoinPool.submit(() -> { // 方法没问题,可能就是在线程这里被卡死了
|
||||||
try {
|
try {
|
||||||
|
AtomicInteger processed = new AtomicInteger(0);
|
||||||
|
log.debug("Scanning files in {}", rootPath);
|
||||||
|
// 收集所有文件到List(避免Stream被重复使用)
|
||||||
|
@SuppressWarnings("resource") List<Path> files = Files.walk(rootPath)
|
||||||
|
.peek(p -> log.trace("visiting: {}", p))
|
||||||
|
.parallel()
|
||||||
|
.filter(p -> {
|
||||||
|
boolean isRegular = Files.isRegularFile(p);
|
||||||
|
if (!isRegular) {
|
||||||
|
log.debug("Skip non-regular : {} ", p);
|
||||||
|
}
|
||||||
|
return isRegular;
|
||||||
|
})
|
||||||
|
.peek(p -> log.trace("Found file: {}", p))
|
||||||
|
.toList(); // 立即消费Stream
|
||||||
|
if (files.isEmpty()) {
|
||||||
|
log.warn("No files found in directory: {}", rootPath);
|
||||||
|
} else log.debug("Found {} files in {}", files.size(), rootPath);
|
||||||
|
files.forEach(file -> {
|
||||||
|
if (cancelled) {
|
||||||
|
log.debug("Cancelled scanning file {}", file);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
log.debug("Handle file {}", file);
|
||||||
listener.onFileFound(file);
|
listener.onFileFound(file);
|
||||||
|
|
||||||
|
// 进度更新
|
||||||
|
if (listener instanceof ProgressAwareListener progressListener) {
|
||||||
|
int current = processed.incrementAndGet();
|
||||||
|
progressListener.onProgressUpdate(
|
||||||
|
current,
|
||||||
|
totalFiles != null ? totalFiles.intValue() : -1
|
||||||
|
);
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
log.debug("Error Handle file {}", file, e);
|
||||||
listener.onError(file, e);
|
listener.onError(file, e);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} catch (IOException e) {
|
if (!cancelled) {
|
||||||
throw new RuntimeException(e);
|
log.debug("Finished scanning files in {}", rootPath);
|
||||||
}
|
|
||||||
}
|
|
||||||
).join();
|
|
||||||
|
|
||||||
listener.onScanComplete();
|
listener.onScanComplete();
|
||||||
}
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
listener.onError(rootPath, e);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Unexpected error in scan thread", e);
|
||||||
|
listener.onError(rootPath, e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
log.debug("Task submitted to thread pool");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void cancel() {
|
||||||
|
cancelled = true;
|
||||||
|
forkJoinPool.shutdownNow();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
forkJoinPool.shutdown();
|
cancel();
|
||||||
try {
|
|
||||||
if (!forkJoinPool.awaitTermination(1, TimeUnit.SECONDS)) {
|
|
||||||
forkJoinPool.shutdownNow();
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
forkJoinPool.shutdownNow();
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,141 @@
|
||||||
|
package top.r3944realms.docchecktoolrefactored.io.scanner;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.AccessDeniedException;
|
||||||
|
import java.nio.file.DirectoryStream;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.concurrent.ForkJoinPool;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class RobustParallelScanner implements FileScanner, AutoCloseable {
|
||||||
|
private final ForkJoinPool forkJoinPool;
|
||||||
|
private volatile boolean cancelled = false;
|
||||||
|
private final int maxDepth;
|
||||||
|
public RobustParallelScanner(int maxDepth) {
|
||||||
|
this(Runtime.getRuntime().availableProcessors(), maxDepth);
|
||||||
|
}
|
||||||
|
public RobustParallelScanner(int parallelism, int maxDepth) {
|
||||||
|
this.forkJoinPool = new ForkJoinPool(parallelism);
|
||||||
|
this.maxDepth = maxDepth; // 防止无限递归
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void scan(Path rootPath, FileScanListener listener) {
|
||||||
|
scanInternal(rootPath, listener, null);
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void scanWithProgress(Path rootPath, ProgressAwareListener listener) {
|
||||||
|
// 预扫描阶段:计算总文件数
|
||||||
|
AtomicLong totalFiles = new AtomicLong(0);
|
||||||
|
countFiles(rootPath, totalFiles);
|
||||||
|
scanInternal(rootPath, listener, totalFiles);
|
||||||
|
}
|
||||||
|
private void countFiles(Path dir, AtomicLong counter) {
|
||||||
|
if (cancelled) return;
|
||||||
|
|
||||||
|
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
|
||||||
|
for (Path path : stream) {
|
||||||
|
if (cancelled) return;
|
||||||
|
|
||||||
|
if (Files.isDirectory(path)) {
|
||||||
|
countFiles(path, counter);
|
||||||
|
} else if (Files.isRegularFile(path)) {
|
||||||
|
counter.incrementAndGet();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.warn("Failed to pre-scan: {}", dir, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private void scanInternal(Path rootPath, FileScanListener listener, AtomicLong totalFiles) {
|
||||||
|
try {
|
||||||
|
validateDirectory(rootPath);
|
||||||
|
|
||||||
|
forkJoinPool.submit(() -> {
|
||||||
|
try {
|
||||||
|
AtomicInteger processedFiles = new AtomicInteger(0);
|
||||||
|
scanDirectory(rootPath, listener, processedFiles, totalFiles, 0);
|
||||||
|
|
||||||
|
if (!cancelled) {
|
||||||
|
listener.onScanComplete();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
listener.onError(rootPath, e);
|
||||||
|
}
|
||||||
|
}).get(30, TimeUnit.SECONDS);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
log.error("Scan timeout: {}", rootPath, e);
|
||||||
|
forkJoinPool.shutdownNow();
|
||||||
|
listener.onError(rootPath, new TimeoutException("扫描超时30秒"));
|
||||||
|
} catch (Exception e) {
|
||||||
|
listener.onError(rootPath, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void scanDirectory(Path dir, FileScanListener listener,
|
||||||
|
AtomicInteger processedFiles, AtomicLong totalFiles, int currentDepth) {
|
||||||
|
if (cancelled || currentDepth > maxDepth) return;
|
||||||
|
|
||||||
|
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
|
||||||
|
for (Path path : stream) {
|
||||||
|
if (cancelled) break;
|
||||||
|
|
||||||
|
if (Files.isDirectory(path)) {
|
||||||
|
scanDirectory(path, listener, processedFiles, totalFiles, currentDepth + 1);
|
||||||
|
} else if (Files.isRegularFile(path)) {
|
||||||
|
processFile(path, listener, processedFiles, totalFiles);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
listener.onError(dir, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private void processFile(Path file, FileScanListener listener,
|
||||||
|
AtomicInteger processedFiles, AtomicLong totalFiles) {
|
||||||
|
if (cancelled) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
listener.onFileFound(file);
|
||||||
|
|
||||||
|
// 进度更新处理
|
||||||
|
if (listener instanceof ProgressAwareListener progressListener && totalFiles != null) {
|
||||||
|
int processed = processedFiles.incrementAndGet();
|
||||||
|
long total = totalFiles.get();
|
||||||
|
progressListener.onProgressUpdate(processed, (int)total);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
listener.onError(file, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateDirectory(Path path) throws IOException {
|
||||||
|
if (!Files.exists(path)) {
|
||||||
|
throw new FileNotFoundException(path.toString());
|
||||||
|
}
|
||||||
|
if (!Files.isReadable(path)) {
|
||||||
|
throw new AccessDeniedException(path.toString());
|
||||||
|
}
|
||||||
|
// 检查是否是挂载点
|
||||||
|
if (Files.getFileStore(path).type().equals("NTFS") &&
|
||||||
|
path.toString().contains("$")) {
|
||||||
|
throw new IOException("系统目录禁止访问: " + path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void cancel() {
|
||||||
|
cancelled = true;
|
||||||
|
forkJoinPool.shutdownNow();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
cancel();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -123,7 +123,7 @@ public class SceneManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try get scene handler.
|
* Try to get scene handler.
|
||||||
*
|
*
|
||||||
* @param node the node
|
* @param node the node
|
||||||
* @param handler the handler
|
* @param handler the handler
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
package top.r3944realms.docchecktoolrefactored.util;
|
||||||
|
|
||||||
|
public class StringUtil {
|
||||||
|
public static String NO_BUG = """
|
||||||
|
|
||||||
|
_ooOoo_
|
||||||
|
o8888888o
|
||||||
|
88" . "88
|
||||||
|
(| -_- |)
|
||||||
|
O\\ = /O
|
||||||
|
____/`---'\\____
|
||||||
|
.' \\\\| |// `.
|
||||||
|
/ \\\\||| : |||// \\
|
||||||
|
/ _||||| -:- |||||- \\
|
||||||
|
| | \\\\\\ - /// | |
|
||||||
|
| \\_| ''\\---/'' | |
|
||||||
|
\\ .-\\__ `-` ___/-. /
|
||||||
|
___`. .' /--.--\\ `. . __
|
||||||
|
."" '< `.___\\_<|>_/___.' >'"".
|
||||||
|
| | : `- \\`.;`\\ _ /`;.`/ - ` : | |
|
||||||
|
\\ \\ `-. \\_ __\\ /__ _/ .-` / /
|
||||||
|
======`-.____`-.___\\_____/___.-`____.-'======
|
||||||
|
`=---='
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
佛祖保佑 永无BUG
|
||||||
|
""";
|
||||||
|
}
|
||||||
9
src/test/java/module-info.java
Normal file
9
src/test/java/module-info.java
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
module top.r3944realms.docchecktoolrefactored.test {
|
||||||
|
requires static lombok;
|
||||||
|
requires org.slf4j;
|
||||||
|
requires top.r3944realms.docchecktoolrefactored;
|
||||||
|
requires org.junit.jupiter.api;
|
||||||
|
|
||||||
|
exports top.r3944realms.docchecktoolrefactored.test;
|
||||||
|
opens top.r3944realms.docchecktoolrefactored.test;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
package top.r3944realms.docchecktoolrefactored.test;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.RepeatedTest;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.core.DuplicateFinder;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.core.FileHashCalculator;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.io.scanner.RobustParallelScanner;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
public class DuplicateFinderPerformanceTest {
|
||||||
|
|
||||||
|
private static final String TEST_PATH = "D:/测试数据/JPG";
|
||||||
|
private FileScanner scanner;
|
||||||
|
private FileHashCalculator hashCalculator;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
void setUp() {
|
||||||
|
scanner = new RobustParallelScanner(20);
|
||||||
|
hashCalculator = FileHashCalculator.defaultInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
@RepeatedTest(5)
|
||||||
|
void compareFinderPerformance() throws IOException {
|
||||||
|
// Test finder WITHOUT pre-counting
|
||||||
|
long startWithoutPrecount = System.nanoTime();
|
||||||
|
DuplicateFinder finderWithoutPrecount = new DuplicateFinder(scanner, hashCalculator, false);
|
||||||
|
finderWithoutPrecount.findDuplicates(Paths.get(TEST_PATH));
|
||||||
|
long durationWithoutPrecount = System.nanoTime() - startWithoutPrecount;
|
||||||
|
|
||||||
|
// Test finder WITH pre-counting
|
||||||
|
long startWithPrecount = System.nanoTime();
|
||||||
|
DuplicateFinder finderWithPrecount = new DuplicateFinder(scanner, hashCalculator, true);
|
||||||
|
finderWithPrecount.findDuplicates(Paths.get(TEST_PATH));
|
||||||
|
long durationWithPrecount = System.nanoTime() - startWithPrecount;
|
||||||
|
|
||||||
|
// Convert to milliseconds
|
||||||
|
long msWithout = TimeUnit.NANOSECONDS.toMillis(durationWithoutPrecount);
|
||||||
|
long msWith = TimeUnit.NANOSECONDS.toMillis(durationWithPrecount);
|
||||||
|
|
||||||
|
System.out.println("Without pre-counting: " + msWithout + " ms");
|
||||||
|
System.out.println("With pre-counting: " + msWith + " ms");
|
||||||
|
|
||||||
|
// // Assert that pre-counting provides benefit
|
||||||
|
// assertTrue(msWith < msWithout * 1.2,
|
||||||
|
// "Pre-counting version should not be more than 20% slower");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void verifySameResults() throws IOException {
|
||||||
|
DuplicateFinder finder1 = new DuplicateFinder(scanner, hashCalculator, false);
|
||||||
|
var result1 = finder1.findDuplicates(Paths.get(TEST_PATH));
|
||||||
|
|
||||||
|
DuplicateFinder finder2 = new DuplicateFinder(scanner, hashCalculator, true);
|
||||||
|
var result2 = finder2.findDuplicates(Paths.get(TEST_PATH));
|
||||||
|
|
||||||
|
// // Verify both methods find the same duplicates
|
||||||
|
// assertTrue(result1.containsAll(result2) && result2.containsAll(result1),
|
||||||
|
// "Both methods should find the same duplicate files");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
package top.r3944realms.docchecktoolrefactored.test;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.core.DuplicateFinder;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.core.FileHashCalculator;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.io.scanner.FileScanner;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.io.scanner.RobustParallelScanner;
|
||||||
|
import top.r3944realms.docchecktoolrefactored.model.DuplicateGroup;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class DuplicateTest {
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
// 创建组件
|
||||||
|
FileScanner scanner1 = new RobustParallelScanner(20);
|
||||||
|
FileHashCalculator hashCalculator1 = FileHashCalculator.defaultInstance();
|
||||||
|
FileScanner scanner2 = new RobustParallelScanner(20);
|
||||||
|
FileHashCalculator hashCalculator2 = FileHashCalculator.defaultInstance();
|
||||||
|
|
||||||
|
// 执行查重
|
||||||
|
DuplicateFinder finder = new DuplicateFinder(scanner1, hashCalculator1);
|
||||||
|
DuplicateFinder finder2 = new DuplicateFinder(scanner2, hashCalculator2, true);
|
||||||
|
List<DuplicateGroup> duplicates = finder.findDuplicates(Paths.get("H:\\nw0\\newworld(1)(1)"));
|
||||||
|
|
||||||
|
// 处理结果
|
||||||
|
duplicates.forEach(group -> {
|
||||||
|
log.info("发现重复文件组({} bytes):", group.size());
|
||||||
|
group.fileMetas().forEach(file ->
|
||||||
|
log.info(" {}", file.getPath())
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,32 @@
|
||||||
|
package top.r3944realms.docchecktoolrefactored.test;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class ParallelFileScannerTest {
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
test1(Path.of("D:/测试数据/JPG"));
|
||||||
|
}
|
||||||
|
private static void test1(Path rootPath) throws IOException {
|
||||||
|
@SuppressWarnings("resource") List<Path> files = Files.walk(rootPath)
|
||||||
|
.peek(p -> log.trace("visiting: {}", p))
|
||||||
|
.parallel()
|
||||||
|
.filter(p -> {
|
||||||
|
boolean isRegular = Files.isRegularFile(p);
|
||||||
|
if (!isRegular) {
|
||||||
|
log.debug("Skip non-regular : {} ", p);
|
||||||
|
}
|
||||||
|
return isRegular;
|
||||||
|
})
|
||||||
|
.peek(p -> log.trace("Found file: {}", p))
|
||||||
|
.toList(); // 立即消费Stream
|
||||||
|
if (files.isEmpty()) {
|
||||||
|
log.warn("No files found in directory: {}", rootPath);
|
||||||
|
} else log.debug("Found {} files in {}", files.size(), rootPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user