package pdf.txt;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
public class PDF {
public static void main(String[] args){
String path="";
path = "D:/Project/e-Statement/estatement_pdf/2021_07_09_estatement/8000054710_est_9ec4a09254a67c1690837ef62f64f9e9.pdf";
PDF p = new PDF();
String content= p.Get_PDF_Content(path);
System.out.println(content);
p.save_result_to_txt(content);
}
public String save_result_to_txt (String content) {
String filepath="D:/PDF.txt";
try {
File file = new File(filepath);
if (!file.exists()) {
file.createNewFile();
}
FileOutputStream outStream = new FileOutputStream(file);
outStream.write(content.trim().getBytes());
outStream.close();
} catch (Exception e) {
e.printStackTrace();
}
return filepath;
}
public String save_result_to_txt_with_path (String path) {
String content = Get_PDF_Content(path);
File f = new File(path);
String folder =f.getParentFile().toString();
String file_name =f.getName();
String filepath=folder +"/"+file_name.replace(".pdf", ".txt");
try {
File file = new File(filepath);
if (!file.exists()) {
file.createNewFile();
}
FileOutputStream outStream = new FileOutputStream(file);
outStream.write(content.trim().getBytes());
outStream.close();
} catch (Exception e) {
e.printStackTrace();
}
return filepath;
}
public String read_PDF(String path) {
String test = "";
File file = new File(path);
FileInputStream in = null;
try {
in = new FileInputStream(file);
RandomAccessRead randomAccessRead = new RandomAccessBufferedFileInputStream(in);
PDFParser parser = new PDFParser(randomAccessRead);
parser.parse();
PDDocument pdDocument = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
test = stripper.getText(pdDocument);
//System.out.println(test);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return test;
}
public String Get_PDF_Content(String path) {
File pdfFile = new File(path);
PDDocument document = null;
String content = "";
try
{
// 方式一:
/**
InputStream input = null;
input = new FileInputStream( pdfFile );
//加载 pdf 文档
PDFParser parser = new PDFParser(new RandomAccessBuffer(input));
parser.parse();
document = parser.getPDDocument();
**/
// 方式二:
document=PDDocument.load(pdfFile);
// 获取页码
int pages = document.getNumberOfPages();
// 读文本内容
PDFTextStripper stripper=new PDFTextStripper();
// 设置按顺序输出
stripper.setSortByPosition(true);
stripper.setStartPage(1);
stripper.setEndPage(pages);
content = stripper.getText(document);
//System.out.println(content);
document.close();
//if(content.contains("Reversal")||content.contains("refund")||content.contains("Invaild")) {
//System.out.println("Reversal: "+path);
//}
} catch (Exception e) {
System.out.println(e);
}
return content;
}
}