目录
最近在编写一个通用可配置的实时ETL工具,即把通过Flink把Kafka的JSON 格式的数据解析后写入关系型数据库,业务要求新来一个JSON格式的业务数据后,通过在配置文件中读取JSON和关系型数据库中表的列映射关系,运行作业即可入库;每一个JSON格式的API对应一个CSV的配置文件,因此需要读取CSV的文件获取配置,本文记录读取CSV和过程,顺便记录了读取TXT的过程。
使用com.csvreader.CsvReader时需要在pom.xml中引用对应的jar包
- <!--csv文件操作-->
- <dependency>
- <groupId>net.sourceforge.javacsv</groupId>
- <artifactId>javacsv</artifactId>
- <version>2.0</version>
- </dependency>
- package util;
- import com.csvreader.CsvReader;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.InputStreamReader;
- import java.nio.charset.Charset;
- import java.util.ArrayList;
-
- public class ReadFileTool {
-
- //读取文件
- public static String readTextFile(String jsonFile) {
- StringBuilder stringBuilder = new StringBuilder();
- try {
- //01.FileInputStream(字节流) 实现了InputStream接口,用来读取文件中的字节流,参数是文件或者文件路径+文件名称
- FileInputStream fileInputStream = new FileInputStream(jsonFile);
- //02.将 fileInputStream(字节流) 流作为参数,转为InputStreamReader(字符流)
- InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, "UTF-8");
- //03.将 字符流(参数)转为字符串流,带缓冲的流读取,默认缓冲区8k
- BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
- String tempString;
- while ((tempString = bufferedReader.readLine()) != null)
- {
- // 将字符串 添加到 stringBuilder中
- stringBuilder.append(tempString);
- }
- bufferedReader.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- return stringBuilder.toString();
- }
-
- //通过BufferedReader读取CSV
- public static ArrayList readCsvFileFromJava(String csvFile,String encoding) {
-
- ArrayList<String[]> csvArrList=new ArrayList<String[]>();
- StringBuilder stringBuilder = new StringBuilder();
- try {
- //01.FileInputStream(字节流) 实现了InputStream接口,用来读取文件中的字节流,参数是文件或者文件路径+文件名称
- FileInputStream fileInputStream = new FileInputStream(csvFile);
- //02.将 fileInputStream(字节流) 流作为参数,转为InputStreamReader(字符流)
- InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, encoding);
- //03.将 字符流(参数)转为字符串流,带缓冲的流读取,默认缓冲区8k
- BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
- String line;
- while ((line = bufferedReader.readLine()) != null)
- {
- // System.out.println(line);
- String item[] = line.split(",",-1);//CSV格式文件为逗号分隔符文件,这里根据逗号切分,-1表示获取所有空值
- csvArrList.add(item);
- }
- bufferedReader.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- return csvArrList;
- }
-
- //通过com.csvreader.CsvReader 读取CSV
- public static ArrayList readCsvFileFromCsvReader(String csvFile,String encoding) {
- ArrayList<String[]> csvArrList=new ArrayList<String[]>();
- try {
- CsvReader reader = new CsvReader(csvFile, ',', Charset.forName(encoding));
- reader.readHeaders();
- String[] headArray = reader.getHeaders();//获取标题
- for(String a:headArray){
- System.out.print(a);
- }
- System.out.println();
- System.out.println("---------");
-
- while (reader.readRecord()) {
- // System.out.println(Arrays.asList(reader.getValues()));
- // 按行读取,并把每一行的数据添加到list集合
- csvArrList.add(reader.getValues());
- }
- reader.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- return csvArrList;
- }
-
- //测试
- public static void main(String[] args) {
-
- String userDir = System.getProperty("user.dir");
- String fileDir = userDir + File.separator + "sourceFile" + File.separator;
- String cfgFileName = "dwd_op_event.csv";
- String csvFullFileName = fileDir + cfgFileName;
- ArrayList<String []> csvArrList =readCsvFileFromCsvReader(csvFullFileName,"GBK");
-
- }
-
- }