From 5a9226901fc41c80daafbd085ec4cf7938a329fa Mon Sep 17 00:00:00 2001 From: octopus_yan Date: Mon, 16 Dec 2024 12:45:45 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BD=BF=E7=94=A8hutool=20csv=E5=B7=A5?= =?UTF-8?q?=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 2 +- .../java/cn/octopusyan/dmt/utils/PBOUtil.java | 2 +- .../dmt/utils/csv/CsvBaseReader.java | 307 ----------- .../octopusyan/dmt/utils/csv/CsvConfig.java | 120 ----- .../cn/octopusyan/dmt/utils/csv/CsvData.java | 83 --- .../octopusyan/dmt/utils/csv/CsvParser.java | 489 ------------------ .../dmt/utils/csv/CsvReadConfig.java | 129 ----- .../octopusyan/dmt/utils/csv/CsvReader.java | 154 ------ .../cn/octopusyan/dmt/utils/csv/CsvRow.java | 264 ---------- .../dmt/utils/csv/CsvRowHandler.java | 18 - .../cn/octopusyan/dmt/utils/csv/CsvUtil.java | 144 ------ 11 files changed, 2 insertions(+), 1710 deletions(-) delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvBaseReader.java delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvConfig.java delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvData.java delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvParser.java delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvReadConfig.java delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvReader.java delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvRow.java delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvRowHandler.java delete mode 100644 src/main/java/cn/octopusyan/dmt/utils/csv/CsvUtil.java diff --git a/pom.xml b/pom.xml index 309edcb..b878ecb 100644 --- a/pom.xml +++ b/pom.xml @@ -135,7 +135,7 @@ cn.hutool hutool-core - 5.8.33 + 5.8.34 diff --git a/src/main/java/cn/octopusyan/dmt/utils/PBOUtil.java b/src/main/java/cn/octopusyan/dmt/utils/PBOUtil.java index 12c50d0..ff9a2fb 100644 --- a/src/main/java/cn/octopusyan/dmt/utils/PBOUtil.java +++ b/src/main/java/cn/octopusyan/dmt/utils/PBOUtil.java @@ -1,11 +1,11 @@ package cn.octopusyan.dmt.utils; +import cn.hutool.core.text.csv.*; import cn.octopusyan.dmt.common.config.Constants; import cn.octopusyan.dmt.common.config.Context; import cn.octopusyan.dmt.common.util.ProcessesUtil; import cn.octopusyan.dmt.model.WordCsvItem; import cn.octopusyan.dmt.model.WordItem; -import cn.octopusyan.dmt.utils.csv.*; import cn.octopusyan.dmt.view.ConsoleLog; import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvBaseReader.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvBaseReader.java deleted file mode 100644 index e95bd4f..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvBaseReader.java +++ /dev/null @@ -1,307 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -import cn.hutool.core.io.FileUtil; -import cn.hutool.core.io.IORuntimeException; -import cn.hutool.core.io.IoUtil; -import cn.hutool.core.lang.Assert; -import cn.hutool.core.util.CharsetUtil; -import cn.hutool.core.util.ObjectUtil; - -import java.io.File; -import java.io.Reader; -import java.io.Serializable; -import java.io.StringReader; -import java.nio.charset.Charset; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -/** - * CSV文件读取器基础类,提供灵活的文件、路径中的CSV读取,一次构造可多次调用读取不同数据,参考:FastCSV - * - * @author Looly - * @since 5.0.4 - */ -public class CsvBaseReader implements Serializable { - private static final long serialVersionUID = 1L; - - /** - * 默认编码 - */ - protected static final Charset DEFAULT_CHARSET = CharsetUtil.CHARSET_UTF_8; - - private final CsvReadConfig config; - - //--------------------------------------------------------------------------------------------- Constructor start - - /** - * 构造,使用默认配置项 - */ - public CsvBaseReader() { - this(null); - } - - /** - * 构造 - * - * @param config 配置项 - */ - public CsvBaseReader(CsvReadConfig config) { - this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig::defaultConfig); - } - //--------------------------------------------------------------------------------------------- Constructor end - - /** - * 设置字段分隔符,默认逗号',' - * - * @param fieldSeparator 字段分隔符,默认逗号',' - */ - public void setFieldSeparator(char fieldSeparator) { - this.config.setFieldSeparator(fieldSeparator); - } - - /** - * 设置 文本分隔符,文本包装符,默认双引号'"' - * - * @param textDelimiter 文本分隔符,文本包装符,默认双引号'"' - */ - public void setTextDelimiter(char textDelimiter) { - this.config.setTextDelimiter(textDelimiter); - } - - /** - * 设置是否首行做为标题行,默认false - * - * @param containsHeader 是否首行做为标题行,默认false - */ - public void setContainsHeader(boolean containsHeader) { - this.config.setContainsHeader(containsHeader); - } - - /** - * 设置是否跳过空白行,默认true - * - * @param skipEmptyRows 是否跳过空白行,默认true - */ - public void setSkipEmptyRows(boolean skipEmptyRows) { - this.config.setSkipEmptyRows(skipEmptyRows); - } - - /** - * 设置每行字段个数不同时是否抛出异常,默认false - * - * @param errorOnDifferentFieldCount 每行字段个数不同时是否抛出异常,默认false - */ - public void setErrorOnDifferentFieldCount(boolean errorOnDifferentFieldCount) { - this.config.setErrorOnDifferentFieldCount(errorOnDifferentFieldCount); - } - - /** - * 读取CSV文件,默认UTF-8编码 - * - * @param file CSV文件 - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(File file) throws IORuntimeException { - return read(file, DEFAULT_CHARSET); - } - - /** - * 从字符串中读取CSV数据 - * - * @param csvStr CSV字符串 - * @return {@link CsvData},包含数据列表和行信息 - */ - public CsvData readFromStr(String csvStr) { - return read(new StringReader(csvStr)); - } - - /** - * 从字符串中读取CSV数据 - * - * @param csvStr CSV字符串 - * @param rowHandler 行处理器,用于一行一行的处理数据 - */ - public void readFromStr(String csvStr, CsvRowHandler rowHandler) { - read(parse(new StringReader(csvStr)), true, rowHandler); - } - - - /** - * 读取CSV文件 - * - * @param file CSV文件 - * @param charset 文件编码,默认系统编码 - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(File file, Charset charset) throws IORuntimeException { - return read(Objects.requireNonNull(file.toPath(), "file must not be null"), charset); - } - - /** - * 读取CSV文件,默认UTF-8编码 - * - * @param path CSV文件 - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(Path path) throws IORuntimeException { - return read(path, DEFAULT_CHARSET); - } - - /** - * 读取CSV文件 - * - * @param path CSV文件 - * @param charset 文件编码,默认系统编码 - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(Path path, Charset charset) throws IORuntimeException { - Assert.notNull(path, "path must not be null"); - return read(FileUtil.getReader(path, charset)); - } - - /** - * 从Reader中读取CSV数据,读取后关闭Reader - * - * @param reader Reader - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(Reader reader) throws IORuntimeException { - return read(reader, true); - } - - /** - * 从Reader中读取CSV数据 - * - * @param reader Reader - * @param close 读取结束是否关闭Reader - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(Reader reader, boolean close) throws IORuntimeException { - final CsvParser csvParser = parse(reader); - final List rows = new ArrayList<>(); - read(csvParser, close, rows::add); - final List header = config.headerLineNo > -1 ? csvParser.getHeader() : null; - - return new CsvData(header, rows); - } - - /** - * 从Reader中读取CSV数据,结果为Map,读取后关闭Reader。
- * 此方法默认识别首行为标题行。 - * - * @param reader Reader - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public List> readMapList(Reader reader) throws IORuntimeException { - // 此方法必须包含标题 - this.config.setContainsHeader(true); - - final List> result = new ArrayList<>(); - read(reader, (row) -> result.add(row.getFieldMap())); - return result; - } - - /** - * 从Reader中读取CSV数据并转换为Bean列表,读取后关闭Reader。
- * 此方法默认识别首行为标题行。 - * - * @param Bean类型 - * @param reader Reader - * @param clazz Bean类型 - * @return Bean列表 - */ - public List read(Reader reader, Class clazz) { - // 此方法必须包含标题 - this.config.setContainsHeader(true); - - final List result = new ArrayList<>(); - read(reader, (row) -> result.add(row.toBean(clazz))); - return result; - } - - /** - * 从字符串中读取CSV数据并转换为Bean列表,读取后关闭Reader。
- * 此方法默认识别首行为标题行。 - * - * @param Bean类型 - * @param csvStr csv字符串 - * @param clazz Bean类型 - * @return Bean列表 - */ - public List read(String csvStr, Class clazz) { - // 此方法必须包含标题 - this.config.setContainsHeader(true); - - final List result = new ArrayList<>(); - read(new StringReader(csvStr), (row) -> result.add(row.toBean(clazz))); - return result; - } - - /** - * 从Reader中读取CSV数据,读取后关闭Reader - * - * @param reader Reader - * @param rowHandler 行处理器,用于一行一行的处理数据 - * @throws IORuntimeException IO异常 - */ - public void read(Reader reader, CsvRowHandler rowHandler) throws IORuntimeException { - read(reader, true, rowHandler); - } - - /** - * 从Reader中读取CSV数据,读取后关闭Reader - * - * @param reader Reader - * @param close 读取结束是否关闭Reader - * @param rowHandler 行处理器,用于一行一行的处理数据 - * @throws IORuntimeException IO异常 - */ - public void read(Reader reader, boolean close, CsvRowHandler rowHandler) throws IORuntimeException { - read(parse(reader), close, rowHandler); - } - - //--------------------------------------------------------------------------------------------- Private method start - - /** - * 读取CSV数据,读取后关闭Parser - * - * @param csvParser CSV解析器 - * @param close 读取结束是否关闭{@link CsvParser} - * @param rowHandler 行处理器,用于一行一行的处理数据 - * @throws IORuntimeException IO异常 - * @since 5.0.4 - */ - private void read(CsvParser csvParser, boolean close, CsvRowHandler rowHandler) throws IORuntimeException { - try { - while (csvParser.hasNext()) { - rowHandler.handle(csvParser.next()); - } - } finally { - if (close) { - IoUtil.close(csvParser); - } - } - } - - /** - * 构建 {@link CsvParser} - * - * @param reader Reader - * @return CsvParser - * @throws IORuntimeException IO异常 - */ - protected CsvParser parse(Reader reader) throws IORuntimeException { - return new CsvParser(reader, this.config); - } - //--------------------------------------------------------------------------------------------- Private method start -} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvConfig.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvConfig.java deleted file mode 100644 index b4c0d53..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvConfig.java +++ /dev/null @@ -1,120 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -import cn.hutool.core.text.csv.CsvWriter; -import cn.hutool.core.util.CharUtil; - -import java.io.Serializable; -import java.util.LinkedHashMap; -import java.util.Map; - -/** - * CSV基础配置项,此配置项可用于读取和写出CSV,定义了包括字段分隔符、文本包装符等符号 - * - * @param 继承子类类型,用于this返回 - * @author looly - * @since 4.0.5 - */ -@SuppressWarnings("unchecked") -public class CsvConfig> implements Serializable { - private static final long serialVersionUID = -8069578249066158459L; - - /** - * 字段分隔符,默认逗号',' - */ - protected char fieldSeparator = CharUtil.COMMA; - /** - * 文本包装符,默认双引号'"' - */ - protected char textDelimiter = CharUtil.DOUBLE_QUOTES; - /** - * 注释符号,用于区分注释行,默认'#' - */ - protected Character commentCharacter = '#'; - /** - * 标题别名 - */ - protected Map headerAlias = new LinkedHashMap<>(); - - /** - * 设置字段分隔符,默认逗号',' - * - * @param fieldSeparator 字段分隔符,默认逗号',' - * @return this - */ - public T setFieldSeparator(final char fieldSeparator) { - this.fieldSeparator = fieldSeparator; - return (T) this; - } - - /** - * 设置 文本分隔符,文本包装符,默认双引号'"' - * - * @param textDelimiter 文本分隔符,文本包装符,默认双引号'"' - * @return this - */ - public T setTextDelimiter(char textDelimiter) { - this.textDelimiter = textDelimiter; - return (T) this; - } - - /** - * 设置注释无效
- * 当写出CSV时,{@link CsvWriter#writeComment(String)}将抛出异常
- * 当读取CSV时,注释行按照正常行读取 - * - * @return this - * @since 5.7.14 - */ - public T disableComment() { - return setCommentCharacter(null); - } - - /** - * 设置 注释符号,用于区分注释行,{@code null}表示忽略注释 - * - * @param commentCharacter 注释符号,用于区分注释行 - * @return this - * @since 5.5.7 - */ - public T setCommentCharacter(Character commentCharacter) { - this.commentCharacter = commentCharacter; - return (T) this; - } - - /** - * 设置标题行的别名Map - * - * @param headerAlias 别名Map - * @return this - * @since 5.7.10 - */ - public T setHeaderAlias(Map headerAlias) { - this.headerAlias = headerAlias; - return (T) this; - } - - /** - * 增加标题别名 - * - * @param header 标题 - * @param alias 别名 - * @return this - * @since 5.7.10 - */ - public T addHeaderAlias(String header, String alias) { - this.headerAlias.put(header, alias); - return (T) this; - } - - /** - * 去除标题别名 - * - * @param header 标题 - * @return this - * @since 5.7.10 - */ - public T removeHeaderAlias(String header) { - this.headerAlias.remove(header); - return (T) this; - } -} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvData.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvData.java deleted file mode 100644 index 5c39018..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvData.java +++ /dev/null @@ -1,83 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -import java.io.Serializable; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -/** - * CSV数据,包括头部信息和行数据,参考:FastCSV - * - * @author Looly - */ -public class CsvData implements Iterable, Serializable { - private static final long serialVersionUID = 1L; - - private final List header; - private final List rows; - - /** - * 构造 - * - * @param header 头信息, 可以为null - * @param rows 行 - */ - public CsvData(final List header, final List rows) { - this.header = header; - this.rows = rows; - } - - /** - * 总行数 - * - * @return 总行数 - */ - public int getRowCount() { - return this.rows.size(); - } - - /** - * 获取头信息列表,如果无头信息为{@code Null},返回列表为只读列表 - * - * @return the header row - might be {@code null} if no header exists - */ - public List getHeader() { - if (null == this.header) { - return null; - } - return Collections.unmodifiableList(this.header); - } - - /** - * 获取指定行,从0开始 - * - * @param index 行号 - * @return 行数据 - * @throws IndexOutOfBoundsException if index is out of range - */ - public CsvRow getRow(final int index) { - return this.rows.get(index); - } - - /** - * 获取所有行 - * - * @return 所有行 - */ - public List getRows() { - return this.rows; - } - - @Override - public Iterator iterator() { - return this.rows.iterator(); - } - - @Override - public String toString() { - return "CsvData{" + - "header=" + header + - ", rows=" + rows + - '}'; - } -} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvParser.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvParser.java deleted file mode 100644 index 808e9cc..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvParser.java +++ /dev/null @@ -1,489 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -import ch.qos.logback.core.CoreConstants; -import cn.hutool.core.collection.ComputeIter; -import cn.hutool.core.io.IORuntimeException; -import cn.hutool.core.io.IoUtil; -import cn.hutool.core.map.MapUtil; -import cn.hutool.core.text.StrBuilder; -import cn.hutool.core.util.CharUtil; -import cn.hutool.core.util.ObjectUtil; -import cn.hutool.core.util.StrUtil; - -import java.io.Closeable; -import java.io.IOException; -import java.io.Reader; -import java.io.Serializable; -import java.util.*; - -/** - * CSV行解析器,参考:FastCSV - * - * @author Looly - */ -public final class CsvParser extends ComputeIter implements Closeable, Serializable { - private static final long serialVersionUID = 1L; - - private static final int DEFAULT_ROW_CAPACITY = 10; - - private final Reader reader; - private final CsvReadConfig config; - - private final Buffer buf = new Buffer(IoUtil.DEFAULT_LARGE_BUFFER_SIZE); - /** - * 前一个特殊分界字符 - */ - private int preChar = -1; - /** - * 是否在引号包装内 - */ - private boolean inQuotes; - /** - * 连续双引号计数 - */ - private int continuousCount = 0; - /** - * 当前读取字段 - */ - private final StrBuilder currentField = new StrBuilder(512); - - /** - * 标题行 - */ - private CsvRow header; - /** - * 当前行号 - */ - private long lineNo = -1; - /** - * 引号内的行数 - */ - private long inQuotesLineCount; - /** - * 第一行字段数,用于检查每行字段数是否一致 - */ - private int firstLineFieldCount = -1; - /** - * 最大字段数量,用于初始化行,减少扩容 - */ - private int maxFieldCount; - /** - * 是否读取结束 - */ - private boolean finished; - - /** - * CSV解析器 - * - * @param reader Reader - * @param config 配置,null则为默认配置 - */ - public CsvParser(final Reader reader, CsvReadConfig config) { - this.reader = Objects.requireNonNull(reader, "reader must not be null"); - this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig::defaultConfig); - } - - /** - * 获取头部字段列表,如果headerLineNo < 0,抛出异常 - * - * @return 头部列表 - * @throws IllegalStateException 如果不解析头部或者没有调用nextRow()方法 - */ - public List getHeader() { - if (config.headerLineNo < 0) { - throw new IllegalStateException("No header available - header parsing is disabled"); - } - if (lineNo < config.beginLineNo) { - throw new IllegalStateException("No header available - call nextRow() first"); - } - return header.getRawList(); - } - - @Override - protected CsvRow computeNext() { - return nextRow(); - } - - /** - * 读取下一行数据 - * - * @return CsvRow - * @throws IORuntimeException IO读取异常 - */ - public CsvRow nextRow() throws IORuntimeException { - List currentFields; - int fieldCount; - while (false == finished) { - currentFields = readLine(); - fieldCount = currentFields.size(); - if (fieldCount < 1) { - // 空List表示读取结束 - break; - } - - // 读取范围校验 - if (lineNo < config.beginLineNo) { - // 未达到读取起始行,继续 - continue; - } - if (lineNo > config.endLineNo) { - // 超出结束行,读取结束 - break; - } - - // 跳过空行 - if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) { - // [""]表示空行 - continue; - } - - // 检查每行的字段数是否一致 - if (config.errorOnDifferentFieldCount) { - if (firstLineFieldCount < 0) { - firstLineFieldCount = fieldCount; - } else if (fieldCount != firstLineFieldCount) { - throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount)); - } - } - - // 记录最大字段数 - if (fieldCount > maxFieldCount) { - maxFieldCount = fieldCount; - } - - //初始化标题 - if (lineNo == config.headerLineNo && null == header) { - initHeader(currentFields); - // 作为标题行后,此行跳过,下一行做为第一行 - continue; - } - - return new CsvRow(lineNo, null == header ? null : header.headerMap, currentFields); - } - - return null; - } - - /** - * 当前行做为标题行 - * - * @param currentFields 当前行字段列表 - */ - private void initHeader(final List currentFields) { - final Map localHeaderMap = new LinkedHashMap<>(currentFields.size()); - for (int i = 0; i < currentFields.size(); i++) { - String field = currentFields.get(i); - if (MapUtil.isNotEmpty(this.config.headerAlias)) { - // 自定义别名 - field = ObjectUtil.defaultIfNull(this.config.headerAlias.get(field), field); - } - if (StrUtil.isNotEmpty(field) && false == localHeaderMap.containsKey(field)) { - localHeaderMap.put(field, i); - } - } - - header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields)); - } - - /** - * 读取一行数据,如果读取结束,返回size为0的List
- * 空行是size为1的List,唯一元素是"" - * - *

- * 行号要考虑注释行和引号包装的内容中的换行 - *

- * - * @return 一行数据 - * @throws IORuntimeException IO异常 - */ - private List readLine() throws IORuntimeException { - // 矫正行号 - // 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上 - if (inQuotesLineCount > 0) { - this.lineNo += this.inQuotesLineCount; - this.inQuotesLineCount = 0; - } - - final List currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY); - - final StrBuilder currentField = this.currentField; - final Buffer buf = this.buf; - int preChar = this.preChar;//前一个特殊分界字符 - int copyLen = 0; //拷贝长度 - boolean inComment = false; - - while (true) { - if (false == buf.hasRemaining()) { - // 此Buffer读取结束,开始读取下一段 - if (copyLen > 0) { - buf.appendTo(currentField, copyLen); - // 此处无需mark,read方法会重置mark - } - if (buf.read(this.reader) < 0) { - // CSV读取结束 - finished = true; - - if (currentField.hasContent() || preChar == config.fieldSeparator) { - //剩余部分作为一个字段 - addField(currentFields, currentField.toStringAndReset()); - } - break; - } - - //重置 - copyLen = 0; - } - - final char c = buf.get(); - - // 注释行标记 - if (preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF) { - // 判断行首字符为指定注释字符的注释开始,直到遇到换行符 - // 行首分两种,1是preChar < 0表示文本开始,2是换行符后紧跟就是下一行的开始 - // issue#IA8WE0 如果注释符出现在包装符内,被认为是普通字符 - if ((false == inQuotes) && null != this.config.commentCharacter && c == this.config.commentCharacter) { - inComment = true; - } - } - // 注释行处理 - if (inComment) { - if (c == CharUtil.CR || c == CharUtil.LF) { - // 注释行以换行符为结尾 - lineNo++; - inComment = false; - } - // 跳过注释行中的任何字符 - buf.mark(); - preChar = c; - continue; - } - - if (inQuotes) { - //引号内,作为内容,直到引号结束 - if (c == config.textDelimiter) { - if (buf.canRead(1) && buf.read(1) == CharUtil.DOUBLE_QUOTES) { - continuousCount++; - } else if (continuousCount != 0 && (continuousCount + 1) % 2 == 0) { - continuousCount = 0; - } else { - inQuotes = false; - } - } else { - if (continuousCount != 0) continuousCount = 0; - // 字段内容中新行 - if (isLineEnd(c, preChar)) { - inQuotesLineCount++; - } - } - // 普通字段字符 - copyLen++; - } else { - // 非引号内 - if (c == config.fieldSeparator) { - //一个字段结束 - if (copyLen > 0) { - buf.appendTo(currentField, copyLen); - copyLen = 0; - } - buf.mark(); - addField(currentFields, currentField.toStringAndReset()); - } else if (c == config.textDelimiter && isFieldBegin(preChar)) { - // 引号开始且出现在字段开头 - inQuotes = true; - copyLen++; - } else if (c == CharUtil.CR) { - // \r,直接结束 - if (copyLen > 0) { - buf.appendTo(currentField, copyLen); - } - buf.mark(); - addField(currentFields, currentField.toStringAndReset()); - preChar = c; - break; - } else if (c == CharUtil.LF) { - // \n - if (preChar != CharUtil.CR) { - if (copyLen > 0) { - buf.appendTo(currentField, copyLen); - } - buf.mark(); - addField(currentFields, currentField.toStringAndReset()); - preChar = c; - break; - } - // 前一个字符是\r,已经处理过这个字段了,此处直接跳过 - buf.mark(); - } else { - // 普通字符 - copyLen++; - } - } - - preChar = c; - } - - // restore fields - this.preChar = preChar; - - lineNo++; - return currentFields; - } - - @Override - public void close() throws IOException { - reader.close(); - } - - /** - * 将字段加入字段列表并自动去包装和去转义 - * - * @param currentFields 当前的字段列表(即为行) - * @param field 字段 - */ - private void addField(List currentFields, String field) { - final char textDelimiter = this.config.textDelimiter; - - // 忽略多余引号后的换行符 - field = StrUtil.trim(field, 1, (c -> c == CharUtil.LF || c == CharUtil.CR)); - // 去除手写csv列值前后的缩进符 - field = field.replaceAll("\t+\"|\"\t+", "\""); - - if (StrUtil.isWrap(field, textDelimiter)) { - field = StrUtil.sub(field, 1, field.length() - 1); - // https://datatracker.ietf.org/doc/html/rfc4180#section-2 - // 第七条规则,只有包装内的包装符需要转义 - field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter)); - } - - if (this.config.trimField) { - // issue#I49M0C@Gitee - field = StrUtil.trim(field); - } - currentFields.add(field); - } - - /** - * 是否行结束符 - * - * @param c 符号 - * @param preChar 前一个字符 - * @return 是否结束 - * @since 5.7.4 - */ - private boolean isLineEnd(char c, int preChar) { - return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR; - } - - /** - * 通过前一个字符,判断是否字段开始,几种情况: - *
    - *
  • 正文开头,无前字符
  • - *
  • 缩进
  • - *
  • 字段分隔符,即上个字段结束
  • - *
  • 换行符,即新行开始
  • - *
- * - * @param preChar 前字符 - * @return 是否字段开始 - */ - private boolean isFieldBegin(final int preChar) { - return preChar == -1 - || preChar == CoreConstants.TAB - || preChar == config.fieldSeparator - || preChar == CharUtil.LF - || preChar == CharUtil.CR; - } - - /** - * 内部Buffer - * - * @author looly - */ - private static class Buffer implements Serializable { - private static final long serialVersionUID = 1L; - - final char[] buf; - - /** - * 标记位置,用于读数据 - */ - private int mark; - /** - * 当前位置 - */ - private int position; - /** - * 读取的数据长度,一般小于buf.length,-1表示无数据 - */ - private int limit; - - Buffer(int capacity) { - buf = new char[capacity]; - } - - /** - * 是否还有未读数据 - * - * @return 是否还有未读数据 - */ - public final boolean hasRemaining() { - return position < limit; - } - - /** - * 读取到缓存
- * 全量读取,会重置Buffer中所有数据 - * - * @param reader {@link Reader} - */ - int read(Reader reader) { - int length; - try { - length = reader.read(this.buf); - } catch (IOException e) { - throw new IORuntimeException(e); - } - this.mark = 0; - this.position = 0; - this.limit = length; - return length; - } - - /** - * 先获取当前字符,再将当前位置后移一位
- * 此方法不检查是否到了数组末尾,请自行使用{@link #hasRemaining()}判断。 - * - * @return 当前位置字符 - * @see #hasRemaining() - */ - char get() { - return this.buf[this.position++]; - } - - boolean canRead(int position) { - return (this.position + position - 1) < limit; - } - - char read(int position) { - return this.buf[this.position + position - 1]; - } - - /** - * 标记位置记为下次读取位置 - */ - void mark() { - this.mark = this.position; - } - - /** - * 将数据追加到{@link StrBuilder},追加结束后需手动调用{@link #mark()} 重置读取位置 - * - * @param builder {@link StrBuilder} - * @param length 追加的长度 - * @see #mark() - */ - void appendTo(StrBuilder builder, int length) { - builder.append(this.buf, this.mark, length); - } - } -} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReadConfig.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReadConfig.java deleted file mode 100644 index f43c937..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReadConfig.java +++ /dev/null @@ -1,129 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -import java.io.Serializable; - -/** - * CSV读取配置项 - * - * @author looly - */ -public class CsvReadConfig extends CsvConfig implements Serializable { - private static final long serialVersionUID = 5396453565371560052L; - - /** - * 指定标题行号,-1表示无标题行 - */ - protected long headerLineNo = -1; - /** - * 是否跳过空白行,默认true - */ - protected boolean skipEmptyRows = true; - /** - * 每行字段个数不同时是否抛出异常,默认false - */ - protected boolean errorOnDifferentFieldCount; - /** - * 定义开始的行(包括),此处为原始文件行号 - */ - protected long beginLineNo; - /** - * 结束的行(包括),此处为原始文件行号 - */ - protected long endLineNo = Long.MAX_VALUE - 1; - /** - * 每个字段是否去除两边空白符 - */ - protected boolean trimField; - - /** - * 默认配置 - * - * @return 默认配置 - */ - public static CsvReadConfig defaultConfig() { - return new CsvReadConfig(); - } - - /** - * 设置是否首行做为标题行,默认false
- * 当设置为{@code true}时,默认标题行号是{@link #beginLineNo},{@code false}为-1,表示无行号 - * - * @param containsHeader 是否首行做为标题行,默认false - * @return this - * @see #setHeaderLineNo(long) - */ - public CsvReadConfig setContainsHeader(boolean containsHeader) { - return setHeaderLineNo(containsHeader ? beginLineNo : -1); - } - - /** - * 设置标题行行号,默认-1,表示无标题行
- * - * @param headerLineNo 标题行行号,-1表示无标题行 - * @return this - * @since 5.7.23 - */ - public CsvReadConfig setHeaderLineNo(long headerLineNo) { - this.headerLineNo = headerLineNo; - return this; - } - - /** - * 设置是否跳过空白行,默认true - * - * @param skipEmptyRows 是否跳过空白行,默认true - * @return this - */ - public CsvReadConfig setSkipEmptyRows(boolean skipEmptyRows) { - this.skipEmptyRows = skipEmptyRows; - return this; - } - - /** - * 设置每行字段个数不同时是否抛出异常,默认false - * - * @param errorOnDifferentFieldCount 每行字段个数不同时是否抛出异常,默认false - * @return this - */ - public CsvReadConfig setErrorOnDifferentFieldCount(boolean errorOnDifferentFieldCount) { - this.errorOnDifferentFieldCount = errorOnDifferentFieldCount; - return this; - } - - /** - * 设置开始的行(包括),默认0,此处为原始文件行号 - * - * @param beginLineNo 开始的行号(包括) - * @return this - * @since 5.7.4 - */ - public CsvReadConfig setBeginLineNo(long beginLineNo) { - this.beginLineNo = beginLineNo; - return this; - } - - /** - * 设置结束的行(包括),默认不限制,此处为原始文件行号 - * - * @param endLineNo 结束的行号(包括) - * @return this - * @since 5.7.4 - */ - public CsvReadConfig setEndLineNo(long endLineNo) { - this.endLineNo = endLineNo; - return this; - } - - /** - * 设置每个字段是否去除两边空白符
- * 如果字段以{@link #textDelimiter}包围,则保留两边空格 - * - * @param trimField 去除两边空白符 - * @return this - * @since 5.7.13 - */ - public CsvReadConfig setTrimField(boolean trimField) { - this.trimField = trimField; - return this; - } -} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReader.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReader.java deleted file mode 100644 index fecfe64..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvReader.java +++ /dev/null @@ -1,154 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -import cn.hutool.core.io.FileUtil; -import cn.hutool.core.io.IORuntimeException; -import cn.hutool.core.io.IoUtil; - -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.io.Reader; -import java.nio.charset.Charset; -import java.nio.file.Path; -import java.util.Iterator; -import java.util.stream.Stream; -import java.util.stream.StreamSupport; - -/** - * CSV文件读取器,参考:FastCSV - * - * @author Looly - * @since 4.0.1 - */ -public class CsvReader extends CsvBaseReader implements Iterable, Closeable { - private static final long serialVersionUID = 1L; - - private final Reader reader; - - //--------------------------------------------------------------------------------------------- Constructor start - - /** - * 构造,使用默认配置项 - */ - public CsvReader() { - this(null); - } - - /** - * 构造 - * - * @param config 配置项 - */ - public CsvReader(CsvReadConfig config) { - this((Reader) null, config); - } - - /** - * 构造,默认{@link #DEFAULT_CHARSET}编码 - * - * @param file CSV文件路径,null表示不设置路径 - * @param config 配置项,null表示默认配置 - * @since 5.0.4 - */ - public CsvReader(File file, CsvReadConfig config) { - this(file, DEFAULT_CHARSET, config); - } - - /** - * 构造,默认{@link #DEFAULT_CHARSET}编码 - * - * @param path CSV文件路径,null表示不设置路径 - * @param config 配置项,null表示默认配置 - * @since 5.0.4 - */ - public CsvReader(Path path, CsvReadConfig config) { - this(path, DEFAULT_CHARSET, config); - } - - /** - * 构造 - * - * @param file CSV文件路径,null表示不设置路径 - * @param charset 编码 - * @param config 配置项,null表示默认配置 - * @since 5.0.4 - */ - public CsvReader(File file, Charset charset, CsvReadConfig config) { - this(FileUtil.getReader(file, charset), config); - } - - /** - * 构造 - * - * @param path CSV文件路径,null表示不设置路径 - * @param charset 编码 - * @param config 配置项,null表示默认配置 - * @since 5.0.4 - */ - public CsvReader(Path path, Charset charset, CsvReadConfig config) { - this(FileUtil.getReader(path, charset), config); - } - - /** - * 构造 - * - * @param reader {@link Reader},null表示不设置默认reader - * @param config 配置项,null表示默认配置 - * @since 5.0.4 - */ - public CsvReader(Reader reader, CsvReadConfig config) { - super(config); - this.reader = reader; - } - //--------------------------------------------------------------------------------------------- Constructor end - - /** - * 读取CSV文件,此方法只能调用一次
- * 调用此方法的前提是构造中传入文件路径或Reader - * - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read() throws IORuntimeException { - return read(this.reader, false); - } - - /** - * 读取CSV数据,此方法只能调用一次
- * 调用此方法的前提是构造中传入文件路径或Reader - * - * @param rowHandler 行处理器,用于一行一行的处理数据 - * @throws IORuntimeException IO异常 - * @since 5.0.4 - */ - public void read(CsvRowHandler rowHandler) throws IORuntimeException { - read(this.reader, false, rowHandler); - } - - /** - * 根据Reader创建{@link Stream},以便使用stream方式读取csv行 - * - * @return {@link Stream} - * @since 5.7.14 - */ - public Stream stream() { - return StreamSupport.stream(spliterator(), false) - .onClose(() -> { - try { - close(); - } catch (final IOException e) { - throw new IORuntimeException(e); - } - }); - } - - @Override - public Iterator iterator() { - return parse(this.reader); - } - - @Override - public void close() throws IOException { - IoUtil.close(this.reader); - } -} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRow.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRow.java deleted file mode 100644 index f3292ec..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRow.java +++ /dev/null @@ -1,264 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -import cn.hutool.core.bean.BeanUtil; -import cn.hutool.core.lang.Assert; - -import java.util.*; - -/** - * CSV中一行的表示 - * - * @author Looly - */ -public final class CsvRow implements List { - - /** - * 原始行号 - */ - private final long originalLineNumber; - - final Map headerMap; - final List fields; - - /** - * 构造 - * - * @param originalLineNumber 对应文件中的第几行 - * @param headerMap 标题Map - * @param fields 数据列表 - */ - public CsvRow(long originalLineNumber, Map headerMap, List fields) { - Assert.notNull(fields, "fields must be not null!"); - this.originalLineNumber = originalLineNumber; - this.headerMap = headerMap; - this.fields = fields; - } - - /** - * 获取原始行号,多行情况下为首行行号。忽略注释行 - * - * @return the original line number 行号 - */ - public long getOriginalLineNumber() { - return originalLineNumber; - } - - /** - * 获取标题对应的字段内容 - * - * @param name 标题名 - * @return 字段值,null表示无此字段值 - * @throws IllegalStateException CSV文件无标题行抛出此异常 - */ - public String getByName(String name) { - Assert.notNull(this.headerMap, "No header available!"); - - final Integer col = headerMap.get(name); - if (col != null) { - return get(col); - } - return null; - } - - /** - * 获取本行所有字段值列表 - * - * @return 字段值列表 - */ - public List getRawList() { - return fields; - } - - /** - * 获取标题与字段值对应的Map - * - * @return 标题与字段值对应的Map - * @throws IllegalStateException CSV文件无标题行抛出此异常 - */ - public Map getFieldMap() { - if (headerMap == null) { - throw new IllegalStateException("No header available"); - } - - final Map fieldMap = new LinkedHashMap<>(headerMap.size(), 1); - String key; - Integer col; - String val; - for (final Map.Entry header : headerMap.entrySet()) { - key = header.getKey(); - col = headerMap.get(key); - val = null == col ? null : get(col); - fieldMap.put(key, val); - } - - return fieldMap; - } - - /** - * 一行数据转换为Bean对象 - * - * @param Bean类型 - * @param clazz bean类 - * @return Bean - * @since 5.3.6 - */ - public T toBean(Class clazz) { - return BeanUtil.toBeanIgnoreError(getFieldMap(), clazz); - } - - /** - * 获取字段格式 - * - * @return 字段格式 - */ - public int getFieldCount() { - return fields.size(); - } - - @Override - public int size() { - return this.fields.size(); - } - - @Override - public boolean isEmpty() { - return this.fields.isEmpty(); - } - - @Override - public boolean contains(Object o) { - return this.fields.contains(o); - } - - @Override - public Iterator iterator() { - return this.fields.iterator(); - } - - @Override - public Object[] toArray() { - return this.fields.toArray(); - } - - @Override - public T[] toArray(T[] a) { - //noinspection SuspiciousToArrayCall - return this.fields.toArray(a); - } - - @Override - public boolean add(String e) { - return this.fields.add(e); - } - - @Override - public boolean remove(Object o) { - return this.fields.remove(o); - } - - @Override - public boolean containsAll(Collection c) { - return this.fields.containsAll(c); - } - - @Override - public boolean addAll(Collection c) { - return this.fields.addAll(c); - } - - @Override - public boolean addAll(int index, Collection c) { - return this.fields.addAll(index, c); - } - - @Override - public boolean removeAll(Collection c) { - return this.fields.removeAll(c); - } - - @Override - public boolean retainAll(Collection c) { - return this.fields.retainAll(c); - } - - @Override - public void clear() { - this.fields.clear(); - } - - @Override - public String get(int index) { - return index >= fields.size() ? null : fields.get(index); - } - - @Override - public String set(int index, String element) { - return this.fields.set(index, element); - } - - @Override - public void add(int index, String element) { - this.fields.add(index, element); - } - - @Override - public String remove(int index) { - return this.fields.remove(index); - } - - @Override - public int indexOf(Object o) { - return this.fields.indexOf(o); - } - - @Override - public int lastIndexOf(Object o) { - return this.fields.lastIndexOf(o); - } - - @Override - public ListIterator listIterator() { - return this.fields.listIterator(); - } - - @Override - public ListIterator listIterator(int index) { - return this.fields.listIterator(index); - } - - @Override - public List subList(int fromIndex, int toIndex) { - return this.fields.subList(fromIndex, toIndex); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("CsvRow{"); - sb.append("originalLineNumber="); - sb.append(originalLineNumber); - sb.append(", "); - - sb.append("fields="); - if (headerMap != null) { - sb.append('{'); - for (final Iterator> it = getFieldMap().entrySet().iterator(); it.hasNext(); ) { - - final Map.Entry entry = it.next(); - sb.append(entry.getKey()); - sb.append('='); - if (entry.getValue() != null) { - sb.append(entry.getValue()); - } - if (it.hasNext()) { - sb.append(", "); - } - } - sb.append('}'); - } else { - sb.append(fields.toString()); - } - - sb.append('}'); - return sb.toString(); - } -} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRowHandler.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRowHandler.java deleted file mode 100644 index 418ce25..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvRowHandler.java +++ /dev/null @@ -1,18 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -/** - * CSV的行处理器,实现此接口用于按照行处理数据 - * - * @author Looly - * @since 5.0.4 - */ -@FunctionalInterface -public interface CsvRowHandler { - - /** - * 处理行数据 - * - * @param row 行数据 - */ - void handle(CsvRow row); -} diff --git a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvUtil.java b/src/main/java/cn/octopusyan/dmt/utils/csv/CsvUtil.java deleted file mode 100644 index dcbe0ad..0000000 --- a/src/main/java/cn/octopusyan/dmt/utils/csv/CsvUtil.java +++ /dev/null @@ -1,144 +0,0 @@ -package cn.octopusyan.dmt.utils.csv; - -import cn.hutool.core.text.csv.CsvWriteConfig; -import cn.hutool.core.text.csv.CsvWriter; - -import java.io.File; -import java.io.Reader; -import java.io.Writer; -import java.nio.charset.Charset; - -/** - * CSV工具 - * - * @author looly - * @since 4.0.5 - */ -public class CsvUtil { - - //----------------------------------------------------------------------------------------------------------- Reader - - /** - * 获取CSV读取器,调用此方法创建的Reader须自行指定读取的资源 - * - * @param config 配置, 允许为空. - * @return {@link CsvReader} - */ - public static CsvReader getReader(CsvReadConfig config) { - return new CsvReader(config); - } - - /** - * 获取CSV读取器,调用此方法创建的Reader须自行指定读取的资源 - * - * @return {@link CsvReader} - */ - public static CsvReader getReader() { - return new CsvReader(); - } - - /** - * 获取CSV读取器 - * - * @param reader {@link Reader} - * @param config 配置, {@code null}表示默认配置 - * @return {@link CsvReader} - * @since 5.7.14 - */ - public static CsvReader getReader(Reader reader, CsvReadConfig config) { - return new CsvReader(reader, config); - } - - /** - * 获取CSV读取器 - * - * @param reader {@link Reader} - * @return {@link CsvReader} - * @since 5.7.14 - */ - public static CsvReader getReader(Reader reader) { - return getReader(reader, null); - } - - //----------------------------------------------------------------------------------------------------------- Writer - - /** - * 获取CSV生成器(写出器),使用默认配置,覆盖已有文件(如果存在) - * - * @param filePath File CSV文件路径 - * @param charset 编码 - * @return {@link CsvWriter} - */ - public static CsvWriter getWriter(String filePath, Charset charset) { - return new CsvWriter(filePath, charset); - } - - /** - * 获取CSV生成器(写出器),使用默认配置,覆盖已有文件(如果存在) - * - * @param file File CSV文件 - * @param charset 编码 - * @return {@link CsvWriter} - */ - public static CsvWriter getWriter(File file, Charset charset) { - return new CsvWriter(file, charset); - } - - /** - * 获取CSV生成器(写出器),使用默认配置 - * - * @param filePath File CSV文件路径 - * @param charset 编码 - * @param isAppend 是否追加 - * @return {@link CsvWriter} - */ - public static CsvWriter getWriter(String filePath, Charset charset, boolean isAppend) { - return new CsvWriter(filePath, charset, isAppend); - } - - /** - * 获取CSV生成器(写出器),使用默认配置 - * - * @param file File CSV文件 - * @param charset 编码 - * @param isAppend 是否追加 - * @return {@link CsvWriter} - */ - public static CsvWriter getWriter(File file, Charset charset, boolean isAppend) { - return new CsvWriter(file, charset, isAppend); - } - - /** - * 获取CSV生成器(写出器) - * - * @param file File CSV文件 - * @param charset 编码 - * @param isAppend 是否追加 - * @param config 写出配置,null则使用默认配置 - * @return {@link CsvWriter} - */ - public static CsvWriter getWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) { - return new CsvWriter(file, charset, isAppend, config); - } - - /** - * 获取CSV生成器(写出器) - * - * @param writer Writer - * @return {@link CsvWriter} - */ - public static CsvWriter getWriter(Writer writer) { - return new CsvWriter(writer); - } - - /** - * 获取CSV生成器(写出器) - * - * @param writer Writer - * @param config 写出配置,null则使用默认配置 - * @return {@link CsvWriter} - */ - public static CsvWriter getWriter(Writer writer, CsvWriteConfig config) { - return new CsvWriter(writer, config); - } -}