获取本地压缩包,解压后根据文件名称及类型,对读取的文件内容进行业务处理。
org.apache.poi
poi
4.1.2
org.apache.poi
poi-ooxml
4.1.2
com.aliyun
aliyun-java-sdk-core
3.4.0
software.amazon.awssdk
aws-sdk-java
2.17.0
com.baidu.aip
java-sdk
4.11.3
org.slf4j
slf4j-simple
1、控制层方法
@GetMapping(value = "/localZipFile")
public Result localZipFile(){
String filePath = "C:\\Users\\Administrator\\Desktop\\11.zip";
List list = new ArrayList<>();
try {
ZipFile zipFile = new ZipFile(filePath);
Enumeration extends ZipEntry> entries = zipFile.getEntries();
//获取zip包下文件名
while (entries.hasMoreElements()) {
list.add(entries.nextElement().getName());
}
String packFileStr = "C:\\Users\\Administrator\\Desktop\\zip";
File file = new File(filePath);
String packFilePath = packFileStr + File.separator;
//解压到指定路径
UnPackeUtil.unPackZip(file, null,packFilePath);
File readFileDir = new File(packFilePath);
List strings = new ArrayList<>();
//获取文件夹下文件集合
File[] files = readFileDir.listFiles();
for (File file1 : files) {
String savePath = MeFileUtils.uploadLocal(file1,"");
FileInputStream inputStream = new FileInputStream(file1);
String fileExtension =MeFileUtils. getFileExtension(file1.getName());
//返回读取的文件内容
String fileContent = MeFileUtils.readGsFile(inputStream,fileExtension,file1);
strings.add(savePath);
strings.add(fileContent);
}
return Result.OK(strings);
} catch (IOException e) {
e.printStackTrace();
return Result.error(e.getMessage());
}
2、MeFileUtils工具类
//上传
public class MeFileUtils{
public static String uploadLocal(File file,String bizPath){
try {
String ctxPath = uploadpath;
String fileName = null;
File file = new File(ctxPath + File.separator + bizPath + File.separator );
if (!file.exists()) {
file.mkdirs();// 创建文件根目录
}
// 获取文件名
String orgName = mf.getName();
orgName = CommonUtils.getFileName(orgName);
if(orgName.indexOf(".")!=-1){
fileName = orgName.substring(0, orgName.lastIndexOf(".")) + "_" + System.currentTimeMillis() + orgName.substring(orgName.lastIndexOf("."));
}else{
fileName = orgName+ "_" + System.currentTimeMillis();
}
String savePath = file.getPath() + File.separator + fileName;
File savefile = new File(savePath);
FileCopyUtils.copy(mf, savefile);
String dbpath = null;
if(oConvertUtils.isNotEmpty(bizPath)){
dbpath = bizPath + File.separator + fileName;
}else{
dbpath = fileName;
}
if (dbpath.contains("\\")) {
dbpath = dbpath.replace("\\", "/");
}
return dbpath;
} catch (IOException e) {
log.error(e.getMessage(), e);
}
return "";
}
//获取文件名
public String getFileExtension(String filename) {
int dotIndex = filename.lastIndexOf(".");
if (dotIndex > 0 && dotIndex < filename.length() - 1) {
return filename.substring(dotIndex + 1).toLowerCase();
}
return "";
}
//获取文件内容
public String readGsFile(FileInputStream inputStream,String fileExtension,File txtFile) {
try {
if (fileExtension.equalsIgnoreCase("doc") || fileExtension.equalsIgnoreCase("docx")) {
// 处理Word文档
XWPFDocument document = new XWPFDocument(inputStream);
// 读取每个段落的文本内容
StringBuilder content = new StringBuilder();
for (XWPFParagraph paragraph : document.getParagraphs()) {
for (XWPFRun run : paragraph.getRuns()) {
content.append(run.text());
}
}
// 关闭文档
document.close();
return content.toString();
}else if(fileExtension.equalsIgnoreCase("txt")){
StringBuffer buffer = new StringBuffer();
// 创建Scanner对象来读取文件内容
Scanner scanner = new Scanner(txtFile);
// 逐行读取文件内容并输出
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
System.out.println(line);
buffer.append(line).append(",");
}
// 关闭Scanner对象
scanner.close();
return buffer.toString();
} else if (fileExtension.equalsIgnoreCase("xls") || fileExtension.equalsIgnoreCase("xlsx")) {
// 处理Excel文档
XSSFWorkbook workbook = new XSSFWorkbook(inputStream);
return "";
} else if (fileExtension.equalsIgnoreCase("pdf")) {
// 处理PDF文档
PDDocument document = PDDocument.load(inputStream);
// 创建 PDFTextStripper 对象
PDFTextStripper textStripper = new PDFTextStripper();
// 读取文档内容
String content = textStripper.getText(document);
document.close();
return content;
}else if (fileExtension.equalsIgnoreCase("jpg") || fileExtension.equalsIgnoreCase("png")) {
// 处理图片
try {
// 调用OCR工具类识别文件内容
String result = BaiduOCRUtil.recognizeFile(txtFile.getAbsolutePath());
return result;
} catch (Exception e) {
e.printStackTrace();
return "";
}
} else {
// 其他文件格式
inputStream.close();
return "";
}
} catch (IOException e) {
e.printStackTrace();
return "";
}
}
}
3、BaiduOCRUtil工具类
public class BaiduOCRUtil {
private static final String APP_ID = "";
private static final String API_KEY = "";
private static final String SECRET_KEY = "";
public static String recognizeFile(String filePath) {
AipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY);
// 设置可选参数
HashMap options = new HashMap<>();
options.put("language_type", "CHN_ENG");
options.put("detect_direction", "true");
options.put("detect_language", "true");
options.put("probability", "true");
// 调用百度云OCR服务识别文件内容
JSONObject response = client.basicGeneral(filePath, options);
// 解析识别结果
StringBuilder result = new StringBuilder();
JSONArray wordsArray = response.getJSONArray("words_result");
for (int i = 0; i < wordsArray.length(); i++) {
JSONObject wordsObject = wordsArray.getJSONObject(i);
String words = wordsObject.getString("words");
result.append(words).append("\n");
}
return result.toString();
}
}
一个在学习的开发者,勿喷,欢迎交流