From e6dd1c39f8e7baaf0f90e8b5e3369e52da8a733f Mon Sep 17 00:00:00 2001 From: octopus_yan Date: Thu, 21 Nov 2024 16:57:10 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E4=BC=98=E5=8C=96=E5=AF=B9=E4=B8=8D?= =?UTF-8?q?=E8=A7=84=E6=95=B4CSV=E5=86=85=E5=AE=B9=E7=9A=84=E5=A4=84?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 8 + .../cn/octopusyan/dmt/model/WordCsvItem.java | 41 +- .../java/cn/octopusyan/dmt/utils/PBOUtil.java | 233 +++++---- .../dmt/utils/csv/CsvBaseReader.java | 307 ++++++++++++ .../octopusyan/dmt/utils/csv/CsvConfig.java | 120 +++++ .../cn/octopusyan/dmt/utils/csv/CsvData.java | 83 +++ .../octopusyan/dmt/utils/csv/CsvParser.java | 471 ++++++++++++++++++ .../dmt/utils/csv/CsvReadConfig.java | 118 +++++ .../octopusyan/dmt/utils/csv/CsvReader.java | 153 ++++++ .../cn/octopusyan/dmt/utils/csv/CsvRow.java | 262 ++++++++++ .../dmt/utils/csv/CsvRowHandler.java | 18 + .../cn/octopusyan/dmt/utils/csv/CsvUtil.java | 144 ++++++ .../dmt/viewModel/MainViewModel.java | 8 + src/main/java/module-info.java | 1 + 14 files changed, 1871 insertions(+), 96 deletions(-) create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvBaseReader.java create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvConfig.java create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvData.java create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvParser.java create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvReadConfig.java create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvReader.java create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvRow.java create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvRowHandler.java create mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvUtil.java diff --git a/pom.xml b/pom.xml index e5f9d82..309edcb 100644 --- a/pom.xml +++ b/pom.xml @@ -130,6 +130,14 @@ ${jackson.version} + + + + cn.hutool + hutool-core + 5.8.33 + + org.kordamp.ikonli diff --git a/src/main/java/cn/octopusyan/dmt/model/WordCsvItem.java b/src/main/java/cn/octopusyan/dmt/model/WordCsvItem.java index 23efe86..aa60e95 100644 --- a/src/main/java/cn/octopusyan/dmt/model/WordCsvItem.java +++ b/src/main/java/cn/octopusyan/dmt/model/WordCsvItem.java @@ -15,18 +15,49 @@ import java.io.File; public class WordCsvItem extends WordItem { /** - * 开始下标(csv繁体 + * 是否规整(有些翻译列数不完整,无法正常分割) */ - private Integer indexTrad; + private boolean regular; + + /** + * csv中Language列文本 + *

+ * 当{@code regular}为{@code false}时,用于获取于csv原文内容,用于拼接格式化文本 + */ + private String header; /** * 原文(获取于csv繁体位置,用于替换翻译文本 */ private String originalTrad; - public WordCsvItem(File file, Integer lines, Integer index, String original, String chinese, Integer indexTrad, String originalTrad) { - super(file, lines, index, original, chinese); - this.indexTrad = indexTrad; + /** + * csv(规整)文本对象 + * + * @param file 文件 + * @param lines 行数 + * @param original 原文 + * @param chinese 中文位置对应的文本 + * @param originalTrad 繁体中文位置对应的文本 + */ + public WordCsvItem(File file, Integer lines, String original, String chinese, String originalTrad) { + super(file, lines, 0, original, chinese); + this.regular = true; this.originalTrad = originalTrad; } + + /** + * csv(不规整)文本对象 + *

+ * + * @param file 文件 + * @param lines 行数 + * @param header Language列对应名称,用于拼接格式化文本 + * @param original 原文 + */ + public WordCsvItem(File file, Integer lines, String header, String original) { + super(file, lines, null, original, ""); + this.regular = false; + this.header = header; + } } diff --git a/src/main/java/cn/octopusyan/dmt/utils/PBOUtil.java b/src/main/java/cn/octopusyan/dmt/utils/PBOUtil.java index 5dad3b7..60f23c0 100644 --- a/src/main/java/cn/octopusyan/dmt/utils/PBOUtil.java +++ b/src/main/java/cn/octopusyan/dmt/utils/PBOUtil.java @@ -5,6 +5,7 @@ import cn.octopusyan.dmt.common.config.Context; import cn.octopusyan.dmt.common.util.ProcessesUtil; import cn.octopusyan.dmt.model.WordCsvItem; import cn.octopusyan.dmt.model.WordItem; +import cn.octopusyan.dmt.utils.csv.*; import cn.octopusyan.dmt.view.ConsoleLog; import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; @@ -15,7 +16,10 @@ import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.regex.Matcher; @@ -154,7 +158,7 @@ public class PBOUtil { * * @param wordFileMap 文件对应文本map */ - public static void writeWords(Map> wordFileMap) { + public static void writeWords(Map> wordFileMap) throws IOException { for (Map.Entry> entry : wordFileMap.entrySet()) { @@ -165,53 +169,35 @@ public class PBOUtil { // 需要转bin文件时,写入bak目录下cpp文件 boolean hasBin = new File(outFilePath(file, ".bin")).exists(); + // 写入TMP下文件 String writePath = file.getAbsolutePath().replace(Constants.BAK_DIR_PATH, Constants.TMP_DIR_PATH); File writeFile = hasBin ? file : new File(writePath); - AtomicInteger lineIndex = new AtomicInteger(0); - List lines = new ArrayList<>(); + List lines; - consoleLog.info("正在写入文件[{}]", writeFile.getAbsolutePath()); + consoleLog.info("正在写入文件 => {}", writeFile.getAbsolutePath()); try (LineIterator it = FileUtils.lineIterator(file, StandardCharsets.UTF_8.name())) { - while (it.hasNext()) { - lineIndex.addAndGet(1); - String line = it.next(); - WordItem word = wordMap.get(lineIndex.get()); - - // 当前行是否有需要替换的文本 - if (word != null && line.contains(word.getOriginal())) { - - if (word instanceof WordCsvItem csvItem) { - // 繁体部分 - String trad = line.substring(csvItem.getIndexTrad(), csvItem.getIndex()); - // 简体部分 - String simp = line.substring(csvItem.getIndex()); - // 拼接 - line = line.substring(0, csvItem.getIndexTrad()) - // Pattern.quote 处理转义字符 - + trad.replaceFirst(Pattern.quote(csvItem.getOriginalTrad()), csvItem.getChinese()) - + simp.replaceFirst(Pattern.quote(csvItem.getOriginal()), csvItem.getChinese()); - } else { - line = line.substring(0, word.getIndex()) + - line.substring(word.getIndex()).replace(word.getOriginal(), word.getChinese()); - } - } - - // 缓存行内容 - lines.add(line); + if (FILE_NAME_STRING_TABLE.equals(file.getName())) { + // 写入 CSV 文件 + lines = writeCsv(file, it, wordMap); + } else { + // 写入 CPP 或 layout 文件 + lines = writeOther(it, wordMap); } } catch (IOException e) { consoleLog.error(STR."文件[\{file.getAbsoluteFile()}]读取出错", e); + throw e; } + // 写入文件 try { - // 写入文件 String charsets = writeFile.getName().endsWith(".layout") ? FileUtil.getCharsets(writeFile) : StandardCharsets.UTF_8.name(); FileUtils.writeLines(writeFile, charsets, lines); } catch (IOException e) { - consoleLog.error(STR."文件(\{writeFile.getAbsoluteFile()})写入失败", e); + consoleLog.error(STR."文件[\{file.getAbsoluteFile()}]写入出错", e); + throw e; } // CPP转BIN (覆盖TMP下BIN文件) @@ -219,6 +205,99 @@ public class PBOUtil { } } + /** + * 写入 CPP 或 layout 文件 + * + * @param it 行遍历器 + * @param wordMap 替换文本map + * @return 待写入行文本列表 + */ + private static List writeOther(LineIterator it, Map wordMap) { + AtomicInteger lineIndex = new AtomicInteger(0); + List lines = new ArrayList<>(); + while (it.hasNext()) { + lineIndex.addAndGet(1); + + String line = it.next(); + WordItem word = wordMap.get(lineIndex.get()); + + if (word != null && line.contains(word.getOriginal())) { + line = line.substring(0, word.getIndex()) + + line.substring(word.getIndex()).replace(word.getOriginal(), word.getChinese()); + } + + lines.add(line); + } + return lines; + } + + + /** + * 写入 CSV 文件 + * + * @param file + * @param it 行遍历器 + * @param wordMap 替换文本map + * @return 待写入行文本列表 + */ + private static List writeCsv(File file, LineIterator it, Map wordMap) { + AtomicInteger lineIndex = new AtomicInteger(0); + List lines = new ArrayList<>(); + + CsvReader reader = CsvUtil.getReader(CsvReadConfig.defaultConfig()); + CsvData data = reader.read(file); + var rowMap = data.getRows().stream() + .collect(Collectors.toMap(CsvRow::getOriginalLineNumber, Function.identity())); + + while (it.hasNext()) { + lineIndex.addAndGet(1); + String line = it.next(); + + WordCsvItem word = (WordCsvItem) wordMap.get(lineIndex.get()); + + // 以 , 开头的行(视为内容带换行符,跳过) + // ,,开头视为空值行(不跳过,尽量还原文本结构 + if (word == null && line.startsWith(",") && !line.startsWith(",,")) { + continue; + } + + // 判断当前行是否有需要替换的文本 + if (word != null && line.contains(word.getOriginal())) { + + // 是否规整(可简单读取的) + if (word.isRegular()) { + CsvRow strings = rowMap.get(Integer.valueOf(lineIndex.get()).longValue() - 1L); + // 替换翻译文本 + strings.set(11, word.getChinese());// 繁体 + strings.set(14, word.getChinese());// 简体 + line = strings.stream().map(item -> STR."\"\{item}\"").collect(Collectors.joining(",")); + + // 处理带换行符文本 + var length = line.split("\r\n|\r|\n").length; + for (int i = 1; i < length; i++) { + lineIndex.addAndGet(1); + String next = it.next(); + consoleLog.debug(STR."next => \"\{next}\""); + } + } else { + // 不规整的直接原文填充 + // Language,original,english,czech,german,russian,polish,hungarian,italian,spanish,french,chinese,japanese,portuguese,chinesesimp + StringBuilder sb = new StringBuilder(); + sb.append("\"").append(word.getHeader()).append("\""); + for (int i = 1; i < 15; i++) { + String str = (i == 11 || i == 14) ? word.getChinese() : word.getOriginal(); + sb.append(",\"").append(str).append("\""); + } + line = sb.toString(); + } + } + + lines.add(line); + } + + return lines; + } + /** * 查找文件内可翻译文本 * @@ -251,7 +330,7 @@ public class PBOUtil { } // CSV if (FILE_NAME_STRING_TABLE.equals(file.getName())) { - return findWordByCSV(file, it); + return findWordByCSV(file); } // layout if (file.getName().endsWith(".layout")) { @@ -268,67 +347,39 @@ public class PBOUtil { return Collections.emptyList(); } - /** - * 从csv文件中读取可翻译文本 - * - * @param file csv文件 - * @param it 行内容遍历器 - * @return 可翻译文本列表 - */ - private static List findWordByCSV(File file, LineIterator it) { + + private static List findWordByCSV(File file) { ArrayList wordItems = new ArrayList<>(); - AtomicInteger lines = new AtomicInteger(0); - int index = -1; - int indexTrad = -1; - int indexOriginal = -1; - String line; - while (it.hasNext()) { - line = it.next(); - boolean contains = line.contains("\""); - String delimit = contains ? "\",\"" : ","; - List split = Arrays.stream(line.split(delimit)).toList(); - lines.addAndGet(1); - if (lines.get() == 1) { - for (int i = 0; i < split.size(); i++) { - String colName = StringUtils.lowerCase(split.get(i)); - if (colName.contains("original")) { - indexOriginal = i; - } else if (colName.contains("chinesesimp")) { - index = i; - } else if (colName.contains("chinese")) { - indexTrad = i; - } - } - continue; - } - - if (index < split.size()) { - // 中文内容 - String chinese = split.get(index).replaceAll("\",?", ""); - // 已有中文翻译则跳过 - if (containsChinese(chinese)) - continue; - - // 原文 - String original = split.get(indexOriginal).replaceAll("\"", ""); - // 繁体内容 - String originalTrad = split.get(indexTrad).replaceAll("\"", ""); - // 开始下标 - String searchSr = contains ? "\",\"" : ","; - int startIndex = StringUtils.ordinalIndexOf(line, searchSr, index); - int startIndexTrad = StringUtils.ordinalIndexOf(line, searchSr, indexTrad); - - // 如果带引号 - startIndex += (contains ? 3 : 1); - startIndexTrad += (contains ? 3 : 1); - - // 添加单词 - if (original.length() > 1) { - wordItems.add(new WordCsvItem(file, lines.get(), startIndex, chinese, "", startIndexTrad, originalTrad)); - } + CsvReadConfig config = CsvReadConfig.defaultConfig().setTrimField(true).setContainsHeader(true); + CsvReader reader = CsvUtil.getReader(config); + CsvData data = reader.read(file); + + // 读取CSV + List rows = data.getRows(); + for (CsvRow row : rows) { + WordItem item; + int lines = (int) (row.getOriginalLineNumber() + 1); + + String original = row.get(1); + // 跳过原文为空的行 + if (StringUtils.isEmpty(original)) continue; + + // 是否可格式化读取 + if (row.size() == 15) { + + String chinese = row.get(11); + + // 已有中文翻译,则跳过 + if (containsChinese(chinese)) continue; + + item = new WordCsvItem(file, lines, original, chinese, row.get(14)); + } else { + item = new WordCsvItem(file, lines, row.getFirst(), original); } + wordItems.add(item); } + return wordItems; } diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvBaseReader.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvBaseReader.java new file mode 100644 index 0000000..33b2937 --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvBaseReader.java @@ -0,0 +1,307 @@ +package cn.octopusyan.dmt.utils.csv; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.io.IORuntimeException; +import cn.hutool.core.io.IoUtil; +import cn.hutool.core.lang.Assert; +import cn.hutool.core.util.CharsetUtil; +import cn.hutool.core.util.ObjectUtil; + +import java.io.File; +import java.io.Reader; +import java.io.Serializable; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * CSV文件读取器基础类,提供灵活的文件、路径中的CSV读取,一次构造可多次调用读取不同数据,参考:FastCSV + * + * @author Looly + * @since 5.0.4 + */ +public class CsvBaseReader implements Serializable { + private static final long serialVersionUID = 1L; + + /** + * 默认编码 + */ + protected static final Charset DEFAULT_CHARSET = CharsetUtil.CHARSET_UTF_8; + + private final CsvReadConfig config; + + //--------------------------------------------------------------------------------------------- Constructor start + + /** + * 构造,使用默认配置项 + */ + public CsvBaseReader() { + this(null); + } + + /** + * 构造 + * + * @param config 配置项 + */ + public CsvBaseReader(CsvReadConfig config) { + this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig::defaultConfig); + } + //--------------------------------------------------------------------------------------------- Constructor end + + /** + * 设置字段分隔符,默认逗号',' + * + * @param fieldSeparator 字段分隔符,默认逗号',' + */ + public void setFieldSeparator(char fieldSeparator) { + this.config.setFieldSeparator(fieldSeparator); + } + + /** + * 设置 文本分隔符,文本包装符,默认双引号'"' + * + * @param textDelimiter 文本分隔符,文本包装符,默认双引号'"' + */ + public void setTextDelimiter(char textDelimiter) { + this.config.setTextDelimiter(textDelimiter); + } + + /** + * 设置是否首行做为标题行,默认false + * + * @param containsHeader 是否首行做为标题行,默认false + */ + public void setContainsHeader(boolean containsHeader) { + this.config.setContainsHeader(containsHeader); + } + + /** + * 设置是否跳过空白行,默认true + * + * @param skipEmptyRows 是否跳过空白行,默认true + */ + public void setSkipEmptyRows(boolean skipEmptyRows) { + this.config.setSkipEmptyRows(skipEmptyRows); + } + + /** + * 设置每行字段个数不同时是否抛出异常,默认false + * + * @param errorOnDifferentFieldCount 每行字段个数不同时是否抛出异常,默认false + */ + public void setErrorOnDifferentFieldCount(boolean errorOnDifferentFieldCount) { + this.config.setErrorOnDifferentFieldCount(errorOnDifferentFieldCount); + } + + /** + * 读取CSV文件,默认UTF-8编码 + * + * @param file CSV文件 + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(File file) throws IORuntimeException { + return read(file, DEFAULT_CHARSET); + } + + /** + * 从字符串中读取CSV数据 + * + * @param csvStr CSV字符串 + * @return {@link CsvData},包含数据列表和行信息 + */ + public CsvData readFromStr(String csvStr) { + return read(new StringReader(csvStr)); + } + + /** + * 从字符串中读取CSV数据 + * + * @param csvStr CSV字符串 + * @param rowHandler 行处理器,用于一行一行的处理数据 + */ + public void readFromStr(String csvStr, CsvRowHandler rowHandler) { + read(parse(new StringReader(csvStr)), true, rowHandler); + } + + + /** + * 读取CSV文件 + * + * @param file CSV文件 + * @param charset 文件编码,默认系统编码 + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(File file, Charset charset) throws IORuntimeException { + return read(Objects.requireNonNull(file.toPath(), "file must not be null"), charset); + } + + /** + * 读取CSV文件,默认UTF-8编码 + * + * @param path CSV文件 + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(Path path) throws IORuntimeException { + return read(path, DEFAULT_CHARSET); + } + + /** + * 读取CSV文件 + * + * @param path CSV文件 + * @param charset 文件编码,默认系统编码 + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(Path path, Charset charset) throws IORuntimeException { + Assert.notNull(path, "path must not be null"); + return read(FileUtil.getReader(path, charset)); + } + + /** + * 从Reader中读取CSV数据,读取后关闭Reader + * + * @param reader Reader + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(Reader reader) throws IORuntimeException { + return read(reader, true); + } + + /** + * 从Reader中读取CSV数据 + * + * @param reader Reader + * @param close 读取结束是否关闭Reader + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(Reader reader, boolean close) throws IORuntimeException { + final CsvParser csvParser = parse(reader); + final List rows = new ArrayList<>(); + read(csvParser, close, rows::add); + final List header = config.headerLineNo > -1 ? csvParser.getHeader() : null; + + return new CsvData(header, rows); + } + + /** + * 从Reader中读取CSV数据,结果为Map,读取后关闭Reader。
+ * 此方法默认识别首行为标题行。 + * + * @param reader Reader + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public List> readMapList(Reader reader) throws IORuntimeException { + // 此方法必须包含标题 + this.config.setContainsHeader(true); + + final List> result = new ArrayList<>(); + read(reader, (row) -> result.add(row.getFieldMap())); + return result; + } + + /** + * 从Reader中读取CSV数据并转换为Bean列表,读取后关闭Reader。
+ * 此方法默认识别首行为标题行。 + * + * @param Bean类型 + * @param reader Reader + * @param clazz Bean类型 + * @return Bean列表 + */ + public List read(Reader reader, Class clazz) { + // 此方法必须包含标题 + this.config.setContainsHeader(true); + + final List result = new ArrayList<>(); + read(reader, (row) -> result.add(row.toBean(clazz))); + return result; + } + + /** + * 从字符串中读取CSV数据并转换为Bean列表,读取后关闭Reader。
+ * 此方法默认识别首行为标题行。 + * + * @param Bean类型 + * @param csvStr csv字符串 + * @param clazz Bean类型 + * @return Bean列表 + */ + public List read(String csvStr, Class clazz) { + // 此方法必须包含标题 + this.config.setContainsHeader(true); + + final List result = new ArrayList<>(); + read(new StringReader(csvStr), (row) -> result.add(row.toBean(clazz))); + return result; + } + + /** + * 从Reader中读取CSV数据,读取后关闭Reader + * + * @param reader Reader + * @param rowHandler 行处理器,用于一行一行的处理数据 + * @throws IORuntimeException IO异常 + */ + public void read(Reader reader, CsvRowHandler rowHandler) throws IORuntimeException { + read(reader, true, rowHandler); + } + + /** + * 从Reader中读取CSV数据,读取后关闭Reader + * + * @param reader Reader + * @param close 读取结束是否关闭Reader + * @param rowHandler 行处理器,用于一行一行的处理数据 + * @throws IORuntimeException IO异常 + */ + public void read(Reader reader, boolean close, CsvRowHandler rowHandler) throws IORuntimeException { + read(parse(reader), close, rowHandler); + } + + //--------------------------------------------------------------------------------------------- Private method start + + /** + * 读取CSV数据,读取后关闭Parser + * + * @param csvParser CSV解析器 + * @param close 读取结束是否关闭{@link CsvParser} + * @param rowHandler 行处理器,用于一行一行的处理数据 + * @throws IORuntimeException IO异常 + * @since 5.0.4 + */ + private void read(CsvParser csvParser, boolean close, CsvRowHandler rowHandler) throws IORuntimeException { + try { + while (csvParser.hasNext()) { + rowHandler.handle(csvParser.next()); + } + } finally { + if(close){ + IoUtil.close(csvParser); + } + } + } + + /** + * 构建 {@link CsvParser} + * + * @param reader Reader + * @return CsvParser + * @throws IORuntimeException IO异常 + */ + protected CsvParser parse(Reader reader) throws IORuntimeException { + return new CsvParser(reader, this.config); + } + //--------------------------------------------------------------------------------------------- Private method start +} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvConfig.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvConfig.java new file mode 100644 index 0000000..178a8cc --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvConfig.java @@ -0,0 +1,120 @@ +package cn.octopusyan.dmt.utils.csv; + +import cn.hutool.core.text.csv.CsvWriter; +import cn.hutool.core.util.CharUtil; + +import java.io.Serializable; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * CSV基础配置项,此配置项可用于读取和写出CSV,定义了包括字段分隔符、文本包装符等符号 + * + * @param 继承子类类型,用于this返回 + * @author looly + * @since 4.0.5 + */ +@SuppressWarnings("unchecked") +public class CsvConfig> implements Serializable { + private static final long serialVersionUID = -8069578249066158459L; + + /** + * 字段分隔符,默认逗号',' + */ + protected char fieldSeparator = CharUtil.COMMA; + /** + * 文本包装符,默认双引号'"' + */ + protected char textDelimiter = CharUtil.DOUBLE_QUOTES; + /** + * 注释符号,用于区分注释行,默认'#' + */ + protected Character commentCharacter = '#'; + /** + * 标题别名 + */ + protected Map headerAlias = new LinkedHashMap<>(); + + /** + * 设置字段分隔符,默认逗号',' + * + * @param fieldSeparator 字段分隔符,默认逗号',' + * @return this + */ + public T setFieldSeparator(final char fieldSeparator) { + this.fieldSeparator = fieldSeparator; + return (T) this; + } + + /** + * 设置 文本分隔符,文本包装符,默认双引号'"' + * + * @param textDelimiter 文本分隔符,文本包装符,默认双引号'"' + * @return this + */ + public T setTextDelimiter(char textDelimiter) { + this.textDelimiter = textDelimiter; + return (T) this; + } + + /** + * 设置注释无效
+ * 当写出CSV时,{@link CsvWriter#writeComment(String)}将抛出异常
+ * 当读取CSV时,注释行按照正常行读取 + * + * @return this + * @since 5.7.14 + */ + public T disableComment() { + return setCommentCharacter(null); + } + + /** + * 设置 注释符号,用于区分注释行,{@code null}表示忽略注释 + * + * @param commentCharacter 注释符号,用于区分注释行 + * @return this + * @since 5.5.7 + */ + public T setCommentCharacter(Character commentCharacter) { + this.commentCharacter = commentCharacter; + return (T) this; + } + + /** + * 设置标题行的别名Map + * + * @param headerAlias 别名Map + * @return this + * @since 5.7.10 + */ + public T setHeaderAlias(Map headerAlias) { + this.headerAlias = headerAlias; + return (T) this; + } + + /** + * 增加标题别名 + * + * @param header 标题 + * @param alias 别名 + * @return this + * @since 5.7.10 + */ + public T addHeaderAlias(String header, String alias) { + this.headerAlias.put(header, alias); + return (T) this; + } + + /** + * 去除标题别名 + * + * @param header 标题 + * @return this + * @since 5.7.10 + */ + public T removeHeaderAlias(String header) { + this.headerAlias.remove(header); + return (T) this; + } +} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvData.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvData.java new file mode 100644 index 0000000..e2196bc --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvData.java @@ -0,0 +1,83 @@ +package cn.octopusyan.dmt.utils.csv; + +import java.io.Serializable; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * CSV数据,包括头部信息和行数据,参考:FastCSV + * + * @author Looly + */ +public class CsvData implements Iterable, Serializable { + private static final long serialVersionUID = 1L; + + private final List header; + private final List rows; + + /** + * 构造 + * + * @param header 头信息, 可以为null + * @param rows 行 + */ + public CsvData(final List header, final List rows) { + this.header = header; + this.rows = rows; + } + + /** + * 总行数 + * + * @return 总行数 + */ + public int getRowCount() { + return this.rows.size(); + } + + /** + * 获取头信息列表,如果无头信息为{@code Null},返回列表为只读列表 + * + * @return the header row - might be {@code null} if no header exists + */ + public List getHeader() { + if(null == this.header){ + return null; + } + return Collections.unmodifiableList(this.header); + } + + /** + * 获取指定行,从0开始 + * + * @param index 行号 + * @return 行数据 + * @throws IndexOutOfBoundsException if index is out of range + */ + public CsvRow getRow(final int index) { + return this.rows.get(index); + } + + /** + * 获取所有行 + * + * @return 所有行 + */ + public List getRows() { + return this.rows; + } + + @Override + public Iterator iterator() { + return this.rows.iterator(); + } + + @Override + public String toString() { + return "CsvData{" + + "header=" + header + + ", rows=" + rows + + '}'; + } +} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvParser.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvParser.java new file mode 100644 index 0000000..99ed94a --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvParser.java @@ -0,0 +1,471 @@ +package cn.octopusyan.dmt.utils.csv; + +import ch.qos.logback.core.CoreConstants; +import cn.hutool.core.collection.ComputeIter; +import cn.hutool.core.io.IORuntimeException; +import cn.hutool.core.io.IoUtil; +import cn.hutool.core.map.MapUtil; +import cn.hutool.core.text.StrBuilder; +import cn.hutool.core.util.CharUtil; +import cn.hutool.core.util.ObjectUtil; +import cn.hutool.core.util.StrUtil; + +import java.io.Closeable; +import java.io.IOException; +import java.io.Reader; +import java.io.Serializable; +import java.util.*; + +/** + * CSV行解析器,参考:FastCSV + * + * @author Looly + */ +public final class CsvParser extends ComputeIter implements Closeable, Serializable { + private static final long serialVersionUID = 1L; + + private static final int DEFAULT_ROW_CAPACITY = 10; + + private final Reader reader; + private final CsvReadConfig config; + + private final Buffer buf = new Buffer(IoUtil.DEFAULT_LARGE_BUFFER_SIZE); + /** + * 前一个特殊分界字符 + */ + private int preChar = -1; + /** + * 是否在引号包装内 + */ + private boolean inQuotes; + /** + * 当前读取字段 + */ + private final StrBuilder currentField = new StrBuilder(512); + + /** + * 标题行 + */ + private CsvRow header; + /** + * 当前行号 + */ + private long lineNo = -1; + /** + * 引号内的行数 + */ + private long inQuotesLineCount; + /** + * 第一行字段数,用于检查每行字段数是否一致 + */ + private int firstLineFieldCount = -1; + /** + * 最大字段数量,用于初始化行,减少扩容 + */ + private int maxFieldCount; + /** + * 是否读取结束 + */ + private boolean finished; + + /** + * CSV解析器 + * + * @param reader Reader + * @param config 配置,null则为默认配置 + */ + public CsvParser(final Reader reader, CsvReadConfig config) { + this.reader = Objects.requireNonNull(reader, "reader must not be null"); + this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig::defaultConfig); + } + + /** + * 获取头部字段列表,如果headerLineNo < 0,抛出异常 + * + * @return 头部列表 + * @throws IllegalStateException 如果不解析头部或者没有调用nextRow()方法 + */ + public List getHeader() { + if (config.headerLineNo < 0) { + throw new IllegalStateException("No header available - header parsing is disabled"); + } + if (lineNo < config.beginLineNo) { + throw new IllegalStateException("No header available - call nextRow() first"); + } + return header.getRawList(); + } + + @Override + protected CsvRow computeNext() { + return nextRow(); + } + + /** + * 读取下一行数据 + * + * @return CsvRow + * @throws IORuntimeException IO读取异常 + */ + public CsvRow nextRow() throws IORuntimeException { + List currentFields; + int fieldCount; + while (false == finished) { + currentFields = readLine(); + fieldCount = currentFields.size(); + if (fieldCount < 1) { + // 空List表示读取结束 + break; + } + + // 读取范围校验 + if(lineNo < config.beginLineNo){ + // 未达到读取起始行,继续 + continue; + } + if(lineNo > config.endLineNo){ + // 超出结束行,读取结束 + break; + } + + // 跳过空行 + if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) { + // [""]表示空行 + continue; + } + + // 检查每行的字段数是否一致 + if (config.errorOnDifferentFieldCount) { + if (firstLineFieldCount < 0) { + firstLineFieldCount = fieldCount; + } else if (fieldCount != firstLineFieldCount) { + throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount)); + } + } + + // 记录最大字段数 + if (fieldCount > maxFieldCount) { + maxFieldCount = fieldCount; + } + + //初始化标题 + if (lineNo == config.headerLineNo && null == header) { + initHeader(currentFields); + // 作为标题行后,此行跳过,下一行做为第一行 + continue; + } + + return new CsvRow(lineNo, null == header ? null : header.headerMap, currentFields); + } + + return null; + } + + /** + * 当前行做为标题行 + * + * @param currentFields 当前行字段列表 + */ + private void initHeader(final List currentFields) { + final Map localHeaderMap = new LinkedHashMap<>(currentFields.size()); + for (int i = 0; i < currentFields.size(); i++) { + String field = currentFields.get(i); + if (MapUtil.isNotEmpty(this.config.headerAlias)) { + // 自定义别名 + field = ObjectUtil.defaultIfNull(this.config.headerAlias.get(field), field); + } + if (StrUtil.isNotEmpty(field) && false == localHeaderMap.containsKey(field)) { + localHeaderMap.put(field, i); + } + } + + header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields)); + } + + /** + * 读取一行数据,如果读取结束,返回size为0的List
+ * 空行是size为1的List,唯一元素是"" + * + *

+ * 行号要考虑注释行和引号包装的内容中的换行 + *

+ * + * @return 一行数据 + * @throws IORuntimeException IO异常 + */ + private List readLine() throws IORuntimeException { + // 矫正行号 + // 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上 + if(inQuotesLineCount > 0){ + this.lineNo += this.inQuotesLineCount; + this.inQuotesLineCount = 0; + } + + final List currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY); + + final StrBuilder currentField = this.currentField; + final Buffer buf = this.buf; + int preChar = this.preChar;//前一个特殊分界字符 + int copyLen = 0; //拷贝长度 + boolean inComment = false; + + while (true) { + if (false == buf.hasRemaining()) { + // 此Buffer读取结束,开始读取下一段 + if (copyLen > 0) { + buf.appendTo(currentField, copyLen); + // 此处无需mark,read方法会重置mark + } + if (buf.read(this.reader) < 0) { + // CSV读取结束 + finished = true; + + if (currentField.hasContent() || preChar == config.fieldSeparator) { + //剩余部分作为一个字段 + addField(currentFields, currentField.toStringAndReset()); + } + break; + } + + //重置 + copyLen = 0; + } + + final char c = buf.get(); + + // 注释行标记 + if(preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF){ + // 判断行首字符为指定注释字符的注释开始,直到遇到换行符 + // 行首分两种,1是preChar < 0表示文本开始,2是换行符后紧跟就是下一行的开始 + // issue#IA8WE0 如果注释符出现在包装符内,被认为是普通字符 + if((false == inQuotes) && null != this.config.commentCharacter && c == this.config.commentCharacter){ + inComment = true; + } + } + // 注释行处理 + if(inComment){ + if (c == CharUtil.CR || c == CharUtil.LF) { + // 注释行以换行符为结尾 + lineNo++; + inComment = false; + } + // 跳过注释行中的任何字符 + buf.mark(); + preChar = c; + continue; + } + + if (inQuotes) { + //引号内,作为内容,直到引号结束 + if (c == config.textDelimiter) { + // End of quoted text + inQuotes = false; + } else { + // 字段内容中新行 + if (isLineEnd(c, preChar)) { + inQuotesLineCount++; + } + } + // 普通字段字符 + copyLen++; + } else { + // 非引号内 + if (c == config.fieldSeparator) { + //一个字段结束 + if (copyLen > 0) { + buf.appendTo(currentField, copyLen); + copyLen = 0; + } + buf.mark(); + addField(currentFields, currentField.toStringAndReset()); + } else if (c == config.textDelimiter && isFieldBegin(preChar)) { + // 引号开始且出现在字段开头 + inQuotes = true; + copyLen++; + } else if (c == CharUtil.CR) { + // \r,直接结束 + if (copyLen > 0) { + buf.appendTo(currentField, copyLen); + } + buf.mark(); + addField(currentFields, currentField.toStringAndReset()); + preChar = c; + break; + } else if (c == CharUtil.LF) { + // \n + if (preChar != CharUtil.CR) { + if (copyLen > 0) { + buf.appendTo(currentField, copyLen); + } + buf.mark(); + addField(currentFields, currentField.toStringAndReset()); + preChar = c; + break; + } + // 前一个字符是\r,已经处理过这个字段了,此处直接跳过 + buf.mark(); + } else { + // 普通字符 + copyLen++; + } + } + + preChar = c; + } + + // restore fields + this.preChar = preChar; + + lineNo++; + return currentFields; + } + + @Override + public void close() throws IOException { + reader.close(); + } + + /** + * 将字段加入字段列表并自动去包装和去转义 + * + * @param currentFields 当前的字段列表(即为行) + * @param field 字段 + */ + private void addField(List currentFields, String field) { + final char textDelimiter = this.config.textDelimiter; + + // 忽略多余引号后的换行符 + field = StrUtil.trim(field, 1, (c-> c == CharUtil.LF || c == CharUtil.CR)); + // 去除手写csv列值前后的缩进符 + field = field.replaceAll("\t+\"|\"\t+", "\""); + + if(StrUtil.isWrap(field, textDelimiter)){ + field = StrUtil.sub(field, 1, field.length() - 1); + // https://datatracker.ietf.org/doc/html/rfc4180#section-2 + // 第七条规则,只有包装内的包装符需要转义 + field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter)); + } + + if(this.config.trimField){ + // issue#I49M0C@Gitee + field = StrUtil.trim(field); + } + currentFields.add(field); + } + + /** + * 是否行结束符 + * + * @param c 符号 + * @param preChar 前一个字符 + * @return 是否结束 + * @since 5.7.4 + */ + private boolean isLineEnd(char c, int preChar) { + return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR; + } + + /** + * 通过前一个字符,判断是否字段开始,几种情况: + *
    + *
  • 正文开头,无前字符
  • + *
  • 缩进
  • + *
  • 字段分隔符,即上个字段结束
  • + *
  • 换行符,即新行开始
  • + *
+ * + * @param preChar 前字符 + * @return 是否字段开始 + */ + private boolean isFieldBegin(final int preChar) { + return preChar == -1 + || preChar == CoreConstants.TAB + || preChar == config.fieldSeparator + || preChar == CharUtil.LF + || preChar == CharUtil.CR; + } + + /** + * 内部Buffer + * + * @author looly + */ + private static class Buffer implements Serializable{ + private static final long serialVersionUID = 1L; + + final char[] buf; + + /** + * 标记位置,用于读数据 + */ + private int mark; + /** + * 当前位置 + */ + private int position; + /** + * 读取的数据长度,一般小于buf.length,-1表示无数据 + */ + private int limit; + + Buffer(int capacity) { + buf = new char[capacity]; + } + + /** + * 是否还有未读数据 + * + * @return 是否还有未读数据 + */ + public final boolean hasRemaining() { + return position < limit; + } + + /** + * 读取到缓存
+ * 全量读取,会重置Buffer中所有数据 + * + * @param reader {@link Reader} + */ + int read(Reader reader) { + int length; + try { + length = reader.read(this.buf); + } catch (IOException e) { + throw new IORuntimeException(e); + } + this.mark = 0; + this.position = 0; + this.limit = length; + return length; + } + + /** + * 先获取当前字符,再将当前位置后移一位
+ * 此方法不检查是否到了数组末尾,请自行使用{@link #hasRemaining()}判断。 + * + * @return 当前位置字符 + * @see #hasRemaining() + */ + char get() { + return this.buf[this.position++]; + } + + /** + * 标记位置记为下次读取位置 + */ + void mark() { + this.mark = this.position; + } + + /** + * 将数据追加到{@link StrBuilder},追加结束后需手动调用{@link #mark()} 重置读取位置 + * + * @param builder {@link StrBuilder} + * @param length 追加的长度 + * @see #mark() + */ + void appendTo(StrBuilder builder, int length) { + builder.append(this.buf, this.mark, length); + } + } +} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReadConfig.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReadConfig.java new file mode 100644 index 0000000..e85c2e0 --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReadConfig.java @@ -0,0 +1,118 @@ +package cn.octopusyan.dmt.utils.csv; + +import java.io.Serializable; + +/** + * CSV读取配置项 + * + * @author looly + * + */ +public class CsvReadConfig extends CsvConfig implements Serializable { + private static final long serialVersionUID = 5396453565371560052L; + + /** 指定标题行号,-1表示无标题行 */ + protected long headerLineNo = -1; + /** 是否跳过空白行,默认true */ + protected boolean skipEmptyRows = true; + /** 每行字段个数不同时是否抛出异常,默认false */ + protected boolean errorOnDifferentFieldCount; + /** 定义开始的行(包括),此处为原始文件行号 */ + protected long beginLineNo; + /** 结束的行(包括),此处为原始文件行号 */ + protected long endLineNo = Long.MAX_VALUE-1; + /** 每个字段是否去除两边空白符 */ + protected boolean trimField; + + /** + * 默认配置 + * + * @return 默认配置 + */ + public static CsvReadConfig defaultConfig() { + return new CsvReadConfig(); + } + + /** + * 设置是否首行做为标题行,默认false
+ * 当设置为{@code true}时,默认标题行号是{@link #beginLineNo},{@code false}为-1,表示无行号 + * + * @param containsHeader 是否首行做为标题行,默认false + * @return this + * @see #setHeaderLineNo(long) + */ + public CsvReadConfig setContainsHeader(boolean containsHeader) { + return setHeaderLineNo(containsHeader ? beginLineNo : -1); + } + + /** + * 设置标题行行号,默认-1,表示无标题行
+ * + * @param headerLineNo 标题行行号,-1表示无标题行 + * @return this + * @since 5.7.23 + */ + public CsvReadConfig setHeaderLineNo(long headerLineNo) { + this.headerLineNo = headerLineNo; + return this; + } + + /** + * 设置是否跳过空白行,默认true + * + * @param skipEmptyRows 是否跳过空白行,默认true + * @return this + */ + public CsvReadConfig setSkipEmptyRows(boolean skipEmptyRows) { + this.skipEmptyRows = skipEmptyRows; + return this; + } + + /** + * 设置每行字段个数不同时是否抛出异常,默认false + * + * @param errorOnDifferentFieldCount 每行字段个数不同时是否抛出异常,默认false + * @return this + */ + public CsvReadConfig setErrorOnDifferentFieldCount(boolean errorOnDifferentFieldCount) { + this.errorOnDifferentFieldCount = errorOnDifferentFieldCount; + return this; + } + + /** + * 设置开始的行(包括),默认0,此处为原始文件行号 + * + * @param beginLineNo 开始的行号(包括) + * @return this + * @since 5.7.4 + */ + public CsvReadConfig setBeginLineNo(long beginLineNo) { + this.beginLineNo = beginLineNo; + return this; + } + + /** + * 设置结束的行(包括),默认不限制,此处为原始文件行号 + * + * @param endLineNo 结束的行号(包括) + * @return this + * @since 5.7.4 + */ + public CsvReadConfig setEndLineNo(long endLineNo) { + this.endLineNo = endLineNo; + return this; + } + + /** + * 设置每个字段是否去除两边空白符
+ * 如果字段以{@link #textDelimiter}包围,则保留两边空格 + * + * @param trimField 去除两边空白符 + * @return this + * @since 5.7.13 + */ + public CsvReadConfig setTrimField(boolean trimField) { + this.trimField = trimField; + return this; + } +} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReader.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReader.java new file mode 100644 index 0000000..235e86a --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReader.java @@ -0,0 +1,153 @@ +package cn.octopusyan.dmt.utils.csv; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.io.IORuntimeException; +import cn.hutool.core.io.IoUtil; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +/** + * CSV文件读取器,参考:FastCSV + * + * @author Looly + * @since 4.0.1 + */ +public class CsvReader extends CsvBaseReader implements Iterable, Closeable { + private static final long serialVersionUID = 1L; + + private final Reader reader; + + //--------------------------------------------------------------------------------------------- Constructor start + + /** + * 构造,使用默认配置项 + */ + public CsvReader() { + this(null); + } + + /** + * 构造 + * + * @param config 配置项 + */ + public CsvReader(CsvReadConfig config) { + this((Reader) null, config); + } + + /** + * 构造,默认{@link #DEFAULT_CHARSET}编码 + * + * @param file CSV文件路径,null表示不设置路径 + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(File file, CsvReadConfig config) { + this(file, DEFAULT_CHARSET, config); + } + + /** + * 构造,默认{@link #DEFAULT_CHARSET}编码 + * + * @param path CSV文件路径,null表示不设置路径 + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(Path path, CsvReadConfig config) { + this(path, DEFAULT_CHARSET, config); + } + + /** + * 构造 + * + * @param file CSV文件路径,null表示不设置路径 + * @param charset 编码 + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(File file, Charset charset, CsvReadConfig config) { + this(FileUtil.getReader(file, charset), config); + } + + /** + * 构造 + * + * @param path CSV文件路径,null表示不设置路径 + * @param charset 编码 + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(Path path, Charset charset, CsvReadConfig config) { + this(FileUtil.getReader(path, charset), config); + } + + /** + * 构造 + * + * @param reader {@link Reader},null表示不设置默认reader + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(Reader reader, CsvReadConfig config) { + super(config); + this.reader = reader; + } + //--------------------------------------------------------------------------------------------- Constructor end + /** + * 读取CSV文件,此方法只能调用一次
+ * 调用此方法的前提是构造中传入文件路径或Reader + * + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read() throws IORuntimeException { + return read(this.reader, false); + } + + /** + * 读取CSV数据,此方法只能调用一次
+ * 调用此方法的前提是构造中传入文件路径或Reader + * + * @param rowHandler 行处理器,用于一行一行的处理数据 + * @throws IORuntimeException IO异常 + * @since 5.0.4 + */ + public void read(CsvRowHandler rowHandler) throws IORuntimeException { + read(this.reader, false, rowHandler); + } + + /** + * 根据Reader创建{@link Stream},以便使用stream方式读取csv行 + * + * @return {@link Stream} + * @since 5.7.14 + */ + public Stream stream() { + return StreamSupport.stream(spliterator(), false) + .onClose(() -> { + try { + close(); + } catch (final IOException e) { + throw new IORuntimeException(e); + } + }); + } + + @Override + public Iterator iterator() { + return parse(this.reader); + } + + @Override + public void close() throws IOException { + IoUtil.close(this.reader); + } +} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRow.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRow.java new file mode 100644 index 0000000..453cf16 --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRow.java @@ -0,0 +1,262 @@ +package cn.octopusyan.dmt.utils.csv; + +import cn.hutool.core.bean.BeanUtil; +import cn.hutool.core.lang.Assert; + +import java.util.*; + +/** + * CSV中一行的表示 + * + * @author Looly + */ +public final class CsvRow implements List { + + /** 原始行号 */ + private final long originalLineNumber; + + final Map headerMap; + final List fields; + + /** + * 构造 + * + * @param originalLineNumber 对应文件中的第几行 + * @param headerMap 标题Map + * @param fields 数据列表 + */ + public CsvRow(long originalLineNumber, Map headerMap, List fields) { + Assert.notNull(fields, "fields must be not null!"); + this.originalLineNumber = originalLineNumber; + this.headerMap = headerMap; + this.fields = fields; + } + + /** + * 获取原始行号,多行情况下为首行行号。忽略注释行 + * + * @return the original line number 行号 + */ + public long getOriginalLineNumber() { + return originalLineNumber; + } + + /** + * 获取标题对应的字段内容 + * + * @param name 标题名 + * @return 字段值,null表示无此字段值 + * @throws IllegalStateException CSV文件无标题行抛出此异常 + */ + public String getByName(String name) { + Assert.notNull(this.headerMap, "No header available!"); + + final Integer col = headerMap.get(name); + if (col != null) { + return get(col); + } + return null; + } + + /** + * 获取本行所有字段值列表 + * + * @return 字段值列表 + */ + public List getRawList() { + return fields; + } + + /** + * 获取标题与字段值对应的Map + * + * @return 标题与字段值对应的Map + * @throws IllegalStateException CSV文件无标题行抛出此异常 + */ + public Map getFieldMap() { + if (headerMap == null) { + throw new IllegalStateException("No header available"); + } + + final Map fieldMap = new LinkedHashMap<>(headerMap.size(), 1); + String key; + Integer col; + String val; + for (final Map.Entry header : headerMap.entrySet()) { + key = header.getKey(); + col = headerMap.get(key); + val = null == col ? null : get(col); + fieldMap.put(key, val); + } + + return fieldMap; + } + + /** + * 一行数据转换为Bean对象 + * + * @param Bean类型 + * @param clazz bean类 + * @return Bean + * @since 5.3.6 + */ + public T toBean(Class clazz){ + return BeanUtil.toBeanIgnoreError(getFieldMap(), clazz); + } + + /** + * 获取字段格式 + * + * @return 字段格式 + */ + public int getFieldCount() { + return fields.size(); + } + + @Override + public int size() { + return this.fields.size(); + } + + @Override + public boolean isEmpty() { + return this.fields.isEmpty(); + } + + @Override + public boolean contains(Object o) { + return this.fields.contains(o); + } + + @Override + public Iterator iterator() { + return this.fields.iterator(); + } + + @Override + public Object[] toArray() { + return this.fields.toArray(); + } + + @Override + public T[] toArray(T[] a) { + //noinspection SuspiciousToArrayCall + return this.fields.toArray(a); + } + + @Override + public boolean add(String e) { + return this.fields.add(e); + } + + @Override + public boolean remove(Object o) { + return this.fields.remove(o); + } + + @Override + public boolean containsAll(Collection c) { + return this.fields.containsAll(c); + } + + @Override + public boolean addAll(Collection c) { + return this.fields.addAll(c); + } + + @Override + public boolean addAll(int index, Collection c) { + return this.fields.addAll(index, c); + } + + @Override + public boolean removeAll(Collection c) { + return this.fields.removeAll(c); + } + + @Override + public boolean retainAll(Collection c) { + return this.fields.retainAll(c); + } + + @Override + public void clear() { + this.fields.clear(); + } + + @Override + public String get(int index) { + return index >= fields.size() ? null : fields.get(index); + } + + @Override + public String set(int index, String element) { + return this.fields.set(index, element); + } + + @Override + public void add(int index, String element) { + this.fields.add(index, element); + } + + @Override + public String remove(int index) { + return this.fields.remove(index); + } + + @Override + public int indexOf(Object o) { + return this.fields.indexOf(o); + } + + @Override + public int lastIndexOf(Object o) { + return this.fields.lastIndexOf(o); + } + + @Override + public ListIterator listIterator() { + return this.fields.listIterator(); + } + + @Override + public ListIterator listIterator(int index) { + return this.fields.listIterator(index); + } + + @Override + public List subList(int fromIndex, int toIndex) { + return this.fields.subList(fromIndex, toIndex); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("CsvRow{"); + sb.append("originalLineNumber="); + sb.append(originalLineNumber); + sb.append(", "); + + sb.append("fields="); + if (headerMap != null) { + sb.append('{'); + for (final Iterator> it = getFieldMap().entrySet().iterator(); it.hasNext();) { + + final Map.Entry entry = it.next(); + sb.append(entry.getKey()); + sb.append('='); + if (entry.getValue() != null) { + sb.append(entry.getValue()); + } + if (it.hasNext()) { + sb.append(", "); + } + } + sb.append('}'); + } else { + sb.append(fields.toString()); + } + + sb.append('}'); + return sb.toString(); + } +} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRowHandler.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRowHandler.java new file mode 100644 index 0000000..623a22a --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRowHandler.java @@ -0,0 +1,18 @@ +package cn.octopusyan.dmt.utils.csv; + +/** + * CSV的行处理器,实现此接口用于按照行处理数据 + * + * @author Looly + * @since 5.0.4 + */ +@FunctionalInterface +public interface CsvRowHandler { + + /** + * 处理行数据 + * + * @param row 行数据 + */ + void handle(CsvRow row); +} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvUtil.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvUtil.java new file mode 100644 index 0000000..9059426 --- /dev/null +++ b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvUtil.java @@ -0,0 +1,144 @@ +package cn.octopusyan.dmt.utils.csv; + +import cn.hutool.core.text.csv.CsvWriteConfig; +import cn.hutool.core.text.csv.CsvWriter; + +import java.io.File; +import java.io.Reader; +import java.io.Writer; +import java.nio.charset.Charset; + +/** + * CSV工具 + * + * @author looly + * @since 4.0.5 + */ +public class CsvUtil { + + //----------------------------------------------------------------------------------------------------------- Reader + + /** + * 获取CSV读取器,调用此方法创建的Reader须自行指定读取的资源 + * + * @param config 配置, 允许为空. + * @return {@link CsvReader} + */ + public static CsvReader getReader(CsvReadConfig config) { + return new CsvReader(config); + } + + /** + * 获取CSV读取器,调用此方法创建的Reader须自行指定读取的资源 + * + * @return {@link CsvReader} + */ + public static CsvReader getReader() { + return new CsvReader(); + } + + /** + * 获取CSV读取器 + * + * @param reader {@link Reader} + * @param config 配置, {@code null}表示默认配置 + * @return {@link CsvReader} + * @since 5.7.14 + */ + public static CsvReader getReader(Reader reader, CsvReadConfig config) { + return new CsvReader(reader, config); + } + + /** + * 获取CSV读取器 + * + * @param reader {@link Reader} + * @return {@link CsvReader} + * @since 5.7.14 + */ + public static CsvReader getReader(Reader reader) { + return getReader(reader, null); + } + + //----------------------------------------------------------------------------------------------------------- Writer + + /** + * 获取CSV生成器(写出器),使用默认配置,覆盖已有文件(如果存在) + * + * @param filePath File CSV文件路径 + * @param charset 编码 + * @return {@link CsvWriter} + */ + public static CsvWriter getWriter(String filePath, Charset charset) { + return new CsvWriter(filePath, charset); + } + + /** + * 获取CSV生成器(写出器),使用默认配置,覆盖已有文件(如果存在) + * + * @param file File CSV文件 + * @param charset 编码 + * @return {@link CsvWriter} + */ + public static CsvWriter getWriter(File file, Charset charset) { + return new CsvWriter(file, charset); + } + + /** + * 获取CSV生成器(写出器),使用默认配置 + * + * @param filePath File CSV文件路径 + * @param charset 编码 + * @param isAppend 是否追加 + * @return {@link CsvWriter} + */ + public static CsvWriter getWriter(String filePath, Charset charset, boolean isAppend) { + return new CsvWriter(filePath, charset, isAppend); + } + + /** + * 获取CSV生成器(写出器),使用默认配置 + * + * @param file File CSV文件 + * @param charset 编码 + * @param isAppend 是否追加 + * @return {@link CsvWriter} + */ + public static CsvWriter getWriter(File file, Charset charset, boolean isAppend) { + return new CsvWriter(file, charset, isAppend); + } + + /** + * 获取CSV生成器(写出器) + * + * @param file File CSV文件 + * @param charset 编码 + * @param isAppend 是否追加 + * @param config 写出配置,null则使用默认配置 + * @return {@link CsvWriter} + */ + public static CsvWriter getWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) { + return new CsvWriter(file, charset, isAppend, config); + } + + /** + * 获取CSV生成器(写出器) + * + * @param writer Writer + * @return {@link CsvWriter} + */ + public static CsvWriter getWriter(Writer writer) { + return new CsvWriter(writer); + } + + /** + * 获取CSV生成器(写出器) + * + * @param writer Writer + * @param config 写出配置,null则使用默认配置 + * @return {@link CsvWriter} + */ + public static CsvWriter getWriter(Writer writer, CsvWriteConfig config) { + return new CsvWriter(writer, config); + } +} diff --git a/src/main/java/cn/octopusyan/dmt/viewModel/MainViewModel.java b/src/main/java/cn/octopusyan/dmt/viewModel/MainViewModel.java index f306fbd..c2259d8 100644 --- a/src/main/java/cn/octopusyan/dmt/viewModel/MainViewModel.java +++ b/src/main/java/cn/octopusyan/dmt/viewModel/MainViewModel.java @@ -11,6 +11,7 @@ import cn.octopusyan.dmt.task.listener.DefaultTaskListener; import cn.octopusyan.dmt.translate.DelayWord; import cn.octopusyan.dmt.translate.TranslateUtil; import cn.octopusyan.dmt.view.ConsoleLog; +import cn.octopusyan.dmt.view.alert.AlertUtil; import javafx.application.Platform; import javafx.beans.property.SimpleStringProperty; import javafx.beans.property.StringProperty; @@ -149,6 +150,13 @@ public class MainViewModel extends BaseViewModel { Platform.runLater(() -> controller.onPackOver(file)); } + @Override + public void onFailed(Throwable throwable) { + super.onFailed(throwable); + Platform.runLater(() -> { + AlertUtil.getInstance().exception(new RuntimeException(throwable)).show(); + }); + } }); packTask.execute(); } diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java index ece8532..5249939 100644 --- a/src/main/java/module-info.java +++ b/src/main/java/module-info.java @@ -18,6 +18,7 @@ module cn.octopusyan.dmt { requires org.kordamp.ikonli.javafx; requires org.kordamp.ikonli.feather; requires java.management; + requires cn.hutool.core; exports cn.octopusyan.dmt; exports cn.octopusyan.dmt.model to com.fasterxml.jackson.databind;