上一篇我们说了java实现电子发票中的发票税号等信息识别的几种可用方案,最后博主选取了识别文件二维码的方式,而且文章最后也说了,这种有局限性,去到的信息有限,而且针对OFD格式也得继续想办法,那接下来,我们就说一下怎么处理这个问题,并且如何去识别OFD格式的发票文件中的内容:
想看上一篇思路的请看博主的这篇文章:
java实现电子发票中的发票税号等信息识别的几种可用方案
看看这一篇发票识别的做法:
先看一下效果:
这是原图:
这个是识别后的效果:
完全一致。
不卖关子了,经过全网寻找,这个文章是比较靠谱的一个:
开源地址:https://github.com/sanluan/einvoice
电子发票识别,可识别 电子普票 电子专票 文件类型支持 pdf ofd
在线识别页面 http://www.heycore.com/invoice.html
如果需要打开ofd文件,再推荐个网站:
https://inv-veri.chinatax.gov.cn/xgxz.html
直接在国家税务总局网站下载,这样ofd格式就可以打开使用了。
部分代码信息:
接口类:InvoiceController
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.dom4j.DocumentException;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.sanluan.einvoice.service.Invoice;
import com.sanluan.einvoice.service.OfdInvoiceExtractor;
import com.sanluan.einvoice.service.PdfInvoiceExtractor;
@RestController
@RequestMapping("/invoice")
public class InvoiceController {
@Value("${backupPath}")
private String backupPath;
private static ThreadLocal<Map<String, DateFormat>> threadLocal = new ThreadLocal<>();
private static final String FILE_NAME_FORMAT_STRING = "yyyy/MM-dd/HH-mm-ssSSSS";
public static final RequestConfig defaultRequestConfig = RequestConfig.custom().setSocketTimeout(5000).setConnectTimeout(5000)
.setConnectionRequestTimeout(5000).build();
/**
* @param pattern
* @return date format
*/
public static DateFormat getDateFormat(String pattern) {
Map<String, DateFormat> map = threadLocal.get();
DateFormat format = null;
if (null == map) {
map = new HashMap<>();
format = new SimpleDateFormat(pattern);
map.put(pattern, format);
threadLocal.set(map);
} else {
format = map.computeIfAbsent(pattern, k -> new SimpleDateFormat(k));
}
return format;
}
@RequestMapping(value = "/extrat")
public Invoice extrat(@RequestParam(value = "file", required = false) MultipartFile file, String url) {
String fileName = getDateFormat(FILE_NAME_FORMAT_STRING).format(new Date());
File dest = null;
boolean ofd = false;
if (null != file && !file.isEmpty()) {
if (file.getOriginalFilename().toLowerCase().endsWith(".ofd")) {
ofd = true;
dest = new File(backupPath, fileName + ".ofd");
} else {
dest = new File(backupPath, fileName + ".pdf");
}
dest.getParentFile().mkdirs();
try {
FileUtils.copyInputStreamToFile(file.getInputStream(), dest);
} catch (IOException e) {
}
} else if (null != url) {
if (url.toLowerCase().endsWith(".ofd")) {
ofd = true;
dest = new File(backupPath, fileName + ".ofd");
} else {
dest = new File(backupPath, fileName + ".pdf");
}
dest.getParentFile().mkdirs();
try (CloseableHttpClient httpclient = HttpClients.custom().setDefaultRequestConfig(defaultRequestConfig).build();) {
HttpUriRequest request = new HttpGet(url);
try (CloseableHttpResponse response = httpclient.execute(request)) {
HttpEntity entity = response.getEntity();
if (null != entity) {
BufferedInputStream inputStream = new BufferedInputStream(entity.getContent());
FileUtils.copyInputStreamToFile(inputStream, dest);
EntityUtils.consume(entity);
}
}
} catch (Exception e) {
}
}
Invoice result = null;
try {
if (null != dest) {
if (ofd) {
result = OfdInvoiceExtractor.extract(dest);
} else {
result = PdfInvoiceExtractor.extract(dest);
}
if (null != result.getAmount()) {
dest.delete();
}
} else {
result = new Invoice();
result.setTitle("error");
}
} catch (IOException | DocumentException e) {
e.printStackTrace();
result = new Invoice();
result.setTitle("error");
}
return result;
}
}
pdf解析类:
import java.awt.Rectangle;
import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;
/**
* 专用于处理电子发票识别的类
*
*/
public class PdfInvoiceExtractor {
public static Invoice extract(File file) throws IOException {
Invoice invoice = new Invoice();
PDDocument doc = PDDocument.load(file);
PDPage firstPage = doc.getPage(0);
int pageWidth = Math.round(firstPage.getCropBox().getWidth());
PDFTextStripper textStripper = new PDFTextStripper();
textStripper.setSortByPosition(true);
String fullText = textStripper.getText(doc);
if (firstPage.getRotation() != 0) {
pageWidth = Math.round(firstPage.getCropBox().getHeight());
}
String allText = replace(fullText).replaceAll("(", "(").replaceAll(")", ")").replaceAll("¥", "¥");
{
String reg = "机器编号:(?<machineNumber>\\d{12})|发票代码:(?<code>\\d{12})|发票号码:(?<number>\\d{8})|:(?<date>\\d{4}年\\d{2}月\\d{2}日)"
+ "|校验码:(?<checksum>\\d{20}|\\S{4,})";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(allText);
while (matcher.find()) {
if (matcher.group("machineNumber") != null) {
invoice.setMachineNumber(matcher.group("machineNumber"));
} else if (matcher.group("code") != null) {
invoice.setCode(matcher.group("code"));
} else if (matcher.group("number") != null) {
invoice.setNumber(matcher.group("number"));
} else if (matcher.group("date") != null) {
invoice.setDate(matcher.group("date"));
} else if (matcher.group("checksum") != null) {
invoice.setChecksum(matcher.group("checksum"));
}
}
}
{
String reg = "合计¥?(?<amount>[^ \\f\\n\\r\\t\\v\\*]*)(?:¥?(?<taxAmount>\\S*)|\\*+)\\s";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(allText);
if (matcher.find()) {
try {
invoice.setAmount(new BigDecimal(matcher.group("amount")));
} catch (Exception e) {
}
try {
invoice.setTaxAmount(new BigDecimal(matcher.group("taxAmount")));
} catch (Exception e) {
invoice.setTaxAmount(new BigDecimal(0));
}
}
}
if (null == invoice.getAmount()) {
String reg = "合\\u0020*计\\u0020*¥?(?<amount>[^ ]*)\\u0020+¥?(?:(?<taxAmount>\\S*)|\\*+)\\s";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(fullText);
if (matcher.find()) {
try {
invoice.setAmount(new BigDecimal(matcher.group("amount")));
} catch (Exception e) {
invoice.setAmount(new BigDecimal(0));
}
try {
invoice.setTaxAmount(new BigDecimal(matcher.group("taxAmount")));
} catch (Exception e) {
invoice.setTaxAmount(new BigDecimal(0));
}
}
}
{
String reg = "价税合计\\u0028大写\\u0029(?<amountString>\\S*)\\u0028小写\\u0029¥?(?<amount>\\S*)\\s";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(allText);
if (matcher.find()) {
invoice.setTotalAmountString(matcher.group("amountString"));
try {
invoice.setTotalAmount(new BigDecimal(matcher.group("amount")));
} catch (Exception e) {
invoice.setTotalAmount(new BigDecimal(0));
}
}
}
{
String reg = "收款人:(?<payee>\\S*)复核:(?<reviewer>\\S*)开票人:(?<drawer>\\S*)销售方";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(allText);
if (matcher.find()) {
invoice.setPayee(matcher.group("payee"));
invoice.setReviewer(matcher.group("reviewer"));
invoice.setDrawer(matcher.group("drawer"));
}
if (allText.indexOf("通行费") > 0 && allText.indexOf("车牌号") > 0) {
invoice.setType("通行费");
}
Pattern type00Pattern = Pattern.compile("(?<p>\\S*)通发票");
Matcher m00 = type00Pattern.matcher(allText);
if (m00.find()) {
invoice.setTitle(m00.group("p").replaceAll("(?:国|统|一|发|票|监|制)", "") + "通发票");
if (null == invoice.getType()) {
invoice.setType("普通发票");
}
} else {
Pattern type01Pattern = Pattern.compile("(?<p>\\S*)用发票");
Matcher m01 = type01Pattern.matcher(allText);
if (m01.find()) {
invoice.setTitle(m01.group("p").replaceAll("(?:国|统|一|发|票|监|制)", "") + "用发票");
if (null == invoice.getType()) {
invoice.setType("专用发票");
}
}
}
}
PDFKeyWordPosition kwp = new PDFKeyWordPosition();
Map<String, List<Position>> positionListMap = kwp
.getCoordinate(Arrays.asList("机器编号", "税率", "价税合计", "合计", "开票日期", "规格型号", "车牌号", "开户行及账号", "密", "码", "区"), doc);
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.setSortByPosition(true);
PDFTextStripperByArea detailStripper = new PDFTextStripperByArea();
detailStripper.setSortByPosition(true);
{
Position machineNumber;
if (positionListMap.get("机器编号").size() > 0) {
machineNumber = positionListMap.get("机器编号").get(0);
} else {
machineNumber = positionListMap.get("开票日期").get(0);
machineNumber.setY(machineNumber.getY() + 30);
}
Position taxRate = positionListMap.get("税率").get(0);
Position totalAmount = positionListMap.get("价税合计").get(0);
Position amount = positionListMap.get("合计").get(0);
Position model = null;
if (!positionListMap.get("规格型号").isEmpty()) {
model = positionListMap.get("规格型号").get(0);
} else {
model = positionListMap.get("车牌号").get(0);
model.setX(model.getX() - 15);
}
List<Position> account = positionListMap.get("开户行及账号");
Position buyer;
Position seller;
if (account.size() < 2) {
buyer = new Position(51, 122);
seller = new Position(51, 341);
} else {
buyer = account.get(0);
seller = account.get(1);
}
int maqX = 370;
List<Position> mi = positionListMap.get("密");
List<Position> ma = positionListMap.get("码");
List<Position> qu = positionListMap.get("区");
for (int i = 0; i < mi.size(); i++) {
float x1 = mi.get(i).getX();
for (int j = 0; j < ma.size(); j++) {
float x2 = ma.get(j).getX();
if (Math.abs(x1 - x2) < 5) {
for (int k = 0; k < qu.size(); k++) {
float x3 = qu.get(k).getX();
if (Math.abs(x2 - x3) < 5) {
maqX = Math.round((x1 + x2 + x3) / 3);
}
}
}
}
}
{
int x = Math.round(model.getX()) - 13;
int y = Math.round(taxRate.getY()) + 5; // 用税率的y坐标作参考
int h = Math.round(amount.getY()) - Math.round(taxRate.getY()) - 25; // 价税合计的y坐标减去税率的y坐标
detailStripper.addRegion("detail", new Rectangle(0, y, pageWidth, h));
stripper.addRegion("detailName", new Rectangle(0, y, x, h));
stripper.addRegion("detailPrice", new Rectangle(x, y, pageWidth, h));
}
{
int x = maqX + 10;
int y = Math.round(machineNumber.getY()) + 10;
int w = pageWidth - maqX - 10;
int h = Math.round(taxRate.getY() - 5) - y;
stripper.addRegion("password", new Rectangle(x, y, w, h));
}
{
int x = Math.round(buyer.getX()) - 15; // 开户行及账号的x为参考
int y = Math.round(machineNumber.getY()) + 10; // 机器编号的y坐标为参考
int w = maqX - x - 5; // 密码区x坐标为参考
int h = Math.round(buyer.getY()) - y + 20; // 开户行及账号的y坐标为参考
stripper.addRegion("buyer", new Rectangle(x, y, w, h));
}
{
int x = Math.round(seller.getX()) - 15; // 开户行及账号为x参考
int y = Math.round(totalAmount.getY()) + 10; // 价税合计的y坐标为参考
int w = maqX - x - 5; // 密码区的x为参考
int h = Math.round(seller.getY()) - y + 20; // 开户行及账号的y为参考
stripper.addRegion("seller", new Rectangle(x, y, w, h));
}
}
stripper.extractRegions(firstPage);
detailStripper.extractRegions(firstPage);
doc.close();
invoice.setPassword(StringUtils.trim(stripper.getTextForRegion("password")));
String reg = "名称:(?<name>\\S*)|纳税人识别号:(?<code>\\S*)|地址、电话:(?<address>\\S*)|开户行及账号:(?<account>\\S*)|电子支付标识:(?<account2>\\S*)";
{
String buyer = replace(stripper.getTextForRegion("buyer"));
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(buyer);
while (matcher.find()) {
if (matcher.group("name") != null) {
invoice.setBuyerName(matcher.group("name"));
} else if (matcher.group("code") != null) {
invoice.setBuyerCode(matcher.group("code"));
} else if (matcher.group("address") != null) {
invoice.setBuyerAddress(matcher.group("address"));
} else if (matcher.group("account") != null) {
invoice.setBuyerAccount(matcher.group("account"));
} else if (matcher.group("account2") != null) {
invoice.setBuyerAccount(matcher.group("account2"));
}
}
}
{
String seller = replace(stripper.getTextForRegion("seller"));
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(seller);
while (matcher.find()) {
if (matcher.group("name") != null) {
invoice.setSellerName(matcher.group("name"));
} else if (matcher.group("code") != null) {
invoice.setSellerCode(matcher.group("code"));
} else if (matcher.group("address") != null) {
invoice.setSellerAddress(matcher.group("address"));
} else if (matcher.group("account") != null) {
invoice.setSellerAccount(matcher.group("account"));
}
}
}
{
List<String> skipList = new ArrayList<>();
List<Detail> detailList = new ArrayList<>();
String[] detailPriceStringArray = stripper.getTextForRegion("detailPrice").replaceAll(" ", " ").replaceAll(" ", " ")
.replaceAll("\r", "").split("\\n");
for (String detailString : detailPriceStringArray) {
Detail detail = new Detail();
detail.setName("");
String[] itemArray = StringUtils.split(detailString, " ");
if (2 == itemArray.length) {
detail.setAmount(new BigDecimal(itemArray[0]));
detail.setTaxAmount(new BigDecimal(itemArray[1]));
detailList.add(detail);
} else if (2 < itemArray.length) {
detail.setAmount(new BigDecimal(itemArray[itemArray.length - 3]));
String taxRate = itemArray[itemArray.length - 2];
if (taxRate.indexOf("免税") > 0 || taxRate.indexOf("不征税") > 0 || taxRate.indexOf("出口零税率") > 0
|| taxRate.indexOf("普通零税率") > 0 || taxRate.indexOf("%") < 0) {
detail.setTaxRate(new BigDecimal(0));
detail.setTaxAmount(new BigDecimal(0));
} else {
BigDecimal rate = new BigDecimal(Integer.parseInt(taxRate.replaceAll("%", "")));
detail.setTaxRate(rate.divide(new BigDecimal(100)));
detail.setTaxAmount(new BigDecimal(itemArray[itemArray.length - 1]));
}
for (int j = 0; j < itemArray.length - 3; j++) {
if (itemArray[j].matches("^(-?\\d+)(\\.\\d+)?$")) {
if (null == detail.getCount()) {
detail.setCount(new BigDecimal(itemArray[j]));
} else {
detail.setPrice(new BigDecimal(itemArray[j]));
}
} else {
if (itemArray.length >= j + 1 && !itemArray[j + 1].matches("^(-?\\d+)(\\.\\d+)?$")) {
detail.setUnit(itemArray[j + 1]);
detail.setModel(itemArray[j]);
j++;
} else if (itemArray[j].length() > 2) {
detail.setModel(itemArray[j]);
} else {
detail.setUnit(itemArray[j]);
}
}
}
detailList.add(detail);
} else {
skipList.add(detailString);
}
}
String[] detailNameStringArray = stripper.getTextForRegion("detailName").replaceAll(" ", " ").replaceAll(" ", " ")
.replaceAll("\r", "").split("\\n");
String[] detailStringArray = replace(detailStripper.getTextForRegion("detail")).replaceAll("\r", "").split("\\n");
int i = 0, j = 0, h = 0, m = 0;
Detail lastDetail = null;
for (String detailString : detailStringArray) {
if (m < detailNameStringArray.length) {
if (detailString.matches("\\S+\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*")
&& !detailString.matches("^ *\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*")
&& detailString.matches("\\S+\\d+%[\\-\\d]+\\S*")
|| detailStringArray.length > i + 1
&& detailStringArray[i + 1].matches("^ *\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*")) {
if (j < detailList.size()) {
lastDetail = detailList.get(j);
lastDetail.setName(detailNameStringArray[m]);
}
j++;
} else if (null != lastDetail && StringUtils.isNotBlank(detailNameStringArray[m])) {
if (skipList.size() > h) {
String skip = skipList.get(h);
if (detailString.endsWith(skip)) {
if (detailString.equals(skip)) {
m--;
} else {
lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
}
lastDetail.setModel(lastDetail.getModel() + skip);
h++;
} else {
lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
}
} else {
lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
}
}
}
i++;
m++;
}
invoice.setDetailList(detailList);
}
return invoice;
}
public static String replace(String str) {
return str.replaceAll(" ", "").replaceAll(" ", "").replaceAll(":", ":").replaceAll(" ", "");
}
这块里面的内容请各位老铁自行封装。
OfdInvoiceExtractor类
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.springframework.util.StreamUtils;
/**
* 专用于处理电子发票识别的类
*
*
*/
public class OfdInvoiceExtractor {
public static Invoice extract(File file) throws IOException, DocumentException {
ZipFile zipFile = new ZipFile(file);
ZipEntry entry = zipFile.getEntry("Doc_0/Attachs/original_invoice.xml");
ZipEntry entry1 = zipFile.getEntry("Doc_0/Pages/Page_0/Content.xml");
InputStream input = zipFile.getInputStream(entry);
InputStream input1 = zipFile.getInputStream(entry1);
String body = StreamUtils.copyToString(input, Charset.forName("utf-8"));
String content = StreamUtils.copyToString(input1, Charset.forName("utf-8"));
zipFile.close();
Document document = DocumentHelper.parseText(body);
Element root = document.getRootElement();
Invoice invoice = new Invoice();
invoice.setMachineNumber(root.elementTextTrim("MachineNo"));
invoice.setCode(root.elementTextTrim("InvoiceCode"));
invoice.setNumber(root.elementTextTrim("InvoiceNo"));
invoice.setDate(root.elementTextTrim("IssueDate"));
invoice.setChecksum(root.elementTextTrim("InvoiceCheckCode"));
invoice.setAmount(new BigDecimal(root.elementTextTrim("TaxExclusiveTotalAmount")));
invoice.setTaxAmount(new BigDecimal(root.elementTextTrim("TaxTotalAmount")));
int ind = content.indexOf("圆整</ofd:TextCode>");
invoice.setTotalAmountString(content.substring(content.lastIndexOf(">", ind) + 1, ind + 2));
invoice.setTotalAmount(new BigDecimal(root.elementTextTrim("TaxInclusiveTotalAmount")));
invoice.setPayee(root.elementTextTrim("Payee"));
invoice.setReviewer(root.elementTextTrim("Checker"));
invoice.setDrawer(root.elementTextTrim("InvoiceClerk"));
int index = content.indexOf("</ofd:TextCode>");
invoice.setTitle(content.substring(content.lastIndexOf(">", index) + 1, index));
invoice.setType("普通发票");
if (invoice.getTitle().contains("专用发票")) {
invoice.setType("专用发票");
} else if (invoice.getTitle().contains("通行费")) {
invoice.setType("通行费");
}
invoice.setPassword(root.elementText("TaxControlCode"));
Element buyer = root.element("Buyer");
{
invoice.setBuyerName(buyer.elementTextTrim("BuyerName"));
invoice.setBuyerCode(buyer.elementTextTrim("BuyerTaxID"));
invoice.setBuyerAddress(buyer.elementTextTrim("BuyerAddrTel"));
invoice.setBuyerAccount(buyer.elementTextTrim("BuyerFinancialAccount"));
}
Element seller = root.element("Seller");
{
invoice.setSellerName(seller.elementTextTrim("SellerName"));
invoice.setSellerCode(seller.elementTextTrim("SellerTaxID"));
invoice.setSellerAddress(seller.elementTextTrim("SellerAddrTel"));
invoice.setSellerAccount(seller.elementTextTrim("SellerFinancialAccount"));
}
Element details = root.element("GoodsInfos");
{
List<Detail> detailList = new ArrayList<>();
List<Element> elements = details.elements();
for (Element element : elements) {
Detail detail = new Detail();
detail.setName(element.elementTextTrim("Item"));
detail.setAmount(new BigDecimal(element.elementTextTrim("Amount")));
detail.setTaxAmount(new BigDecimal(element.elementTextTrim("TaxAmount")));
detail.setCount(new BigDecimal(element.elementTextTrim("Quantity")));
detail.setPrice(new BigDecimal(element.elementTextTrim("Price")));
detail.setUnit(element.elementTextTrim("MeasurementDimension"));
detail.setModel(element.elementTextTrim("Specification"));
detail.setTaxRate(
new BigDecimal(element.elementTextTrim("TaxScheme").replace("%", "")).divide(new BigDecimal(100)));
detailList.add(detail);
}
invoice.setDetailList(detailList);
}
return invoice;
}
}
请注意空指针:element.elementTextTrim("TaxAmount")
不是每个发票都有这个的,比如这个发票,直接免税,所以请注意修改
VO对象:
import java.math.BigDecimal;
import java.util.List;
public class Invoice {
private String title;
private String machineNumber;
private String code;
private String number;
private String date;
private String checksum;
private String buyerName;
private String buyerCode;
private String buyerAddress;
private String buyerAccount;
private String password;
private BigDecimal amount;
private BigDecimal taxAmount;
private String totalAmountString;
private BigDecimal totalAmount;
private String sellerName;
private String sellerCode;
private String sellerAddress;
private String sellerAccount;
private String payee;
private String reviewer;
private String drawer;
private String type;
private List<Detail> detailList;
/**
* @return the title
*/
public String getTitle() {
return title;
}
/**
* @param title
* the title to set
*/
public void setTitle(String title) {
this.title = title;
}
/**
* @return the machineNumber
*/
public String getMachineNumber() {
return machineNumber;
}
/**
* @param machineNumber
* the machineNumber to set
*/
public void setMachineNumber(String machineNumber) {
this.machineNumber = machineNumber;
}
/**
* @return the code
*/
public String getCode() {
return code;
}
/**
* @param code
* the code to set
*/
public void setCode(String code) {
this.code = code;
}
/**
* @return the number
*/
public String getNumber() {
return number;
}
/**
* @param number
* the number to set
*/
public void setNumber(String number) {
this.number = number;
}
/**
* @return the date
*/
public String getDate() {
return date;
}
/**
* @param date
* the date to set
*/
public void setDate(String date) {
this.date = date;
}
/**
* @return the checksum
*/
public String getChecksum() {
return checksum;
}
/**
* @param checksum
* the checksum to set
*/
public void setChecksum(String checksum) {
this.checksum = checksum;
}
/**
* @return the buyerName
*/
public String getBuyerName() {
return buyerName;
}
/**
* @param buyerName
* the buyerName to set
*/
public void setBuyerName(String buyerName) {
this.buyerName = buyerName;
}
/**
* @return the buyerInvoiceCode
*/
public String getBuyerCode() {
return buyerCode;
}
/**
* @param buyerCode
* the buyerCode to set
*/
public void setBuyerCode(String buyerCode) {
this.buyerCode = buyerCode;
}
/**
* @return the buyerAddress
*/
public String getBuyerAddress() {
return buyerAddress;
}
/**
* @param buyerAddress
* the buyerAddress to set
*/
public void setBuyerAddress(String buyerAddress) {
this.buyerAddress = buyerAddress;
}
/**
* @return the buyerAccount
*/
public String getBuyerAccount() {
return buyerAccount;
}
/**
* @param buyerAccount
* the buyerAccount to set
*/
public void setBuyerAccount(String buyerAccount) {
this.buyerAccount = buyerAccount;
}
/**
* @return the password
*/
public String getPassword() {
return password;
}
/**
* @param password
* the password to set
*/
public void setPassword(String password) {
this.password = password;
}
/**
* @return the amount
*/
public BigDecimal getAmount() {
return amount;
}
/**
* @param amount
* the amount to set
*/
public void setAmount(BigDecimal amount) {
this.amount = amount;
}
/**
* @return the taxAmount
*/
public BigDecimal getTaxAmount() {
return taxAmount;
}
/**
* @param taxAmount
* the taxAmount to set
*/
public void setTaxAmount(BigDecimal taxAmount) {
this.taxAmount = taxAmount;
}
/**
* @return the totalAmountString
*/
public String getTotalAmountString() {
return totalAmountString;
}
/**
* @param totalAmountString
* the totalAmountString to set
*/
public void setTotalAmountString(String totalAmountString) {
this.totalAmountString = totalAmountString;
}
/**
* @return the totalAmount
*/
public BigDecimal getTotalAmount() {
return totalAmount;
}
/**
* @param totalAmount
* the totalAmount to set
*/
public void setTotalAmount(BigDecimal totalAmount) {
this.totalAmount = totalAmount;
}
/**
* @return the sellerName
*/
public String getSellerName() {
return sellerName;
}
/**
* @param sellerName
* the sellerName to set
*/
public void setSellerName(String sellerName) {
this.sellerName = sellerName;
}
/**
* @return the sellerCode
*/
public String getSellerCode() {
return sellerCode;
}
/**
* @param sellerCode
* the sellerCode to set
*/
public void setSellerCode(String sellerCode) {
this.sellerCode = sellerCode;
}
/**
* @return the sellerAddress
*/
public String getSellerAddress() {
return sellerAddress;
}
/**
* @param sellerAddress
* the sellerAddress to set
*/
public void setSellerAddress(String sellerAddress) {
this.sellerAddress = sellerAddress;
}
/**
* @return the sellerAccount
*/
public String getSellerAccount() {
return sellerAccount;
}
/**
* @param sellerAccount
* the sellerAccount to set
*/
public void setSellerAccount(String sellerAccount) {
this.sellerAccount = sellerAccount;
}
/**
* @return the payee
*/
public String getPayee() {
return payee;
}
/**
* @param payee
* the payee to set
*/
public void setPayee(String payee) {
this.payee = payee;
}
/**
* @return the reviewer
*/
public String getReviewer() {
return reviewer;
}
/**
* @param reviewer
* the reviewer to set
*/
public void setReviewer(String reviewer) {
this.reviewer = reviewer;
}
/**
* @return the drawer
*/
public String getDrawer() {
return drawer;
}
/**
* @param drawer
* the drawer to set
*/
public void setDrawer(String drawer) {
this.drawer = drawer;
}
/**
* @return the type
*/
public String getType() {
return type;
}
/**
* @param type
* the type to set
*/
public void setType(String type) {
this.type = type;
}
/**
* @return the detailList
*/
public List<Detail> getDetailList() {
return detailList;
}
/**
* @param detailList
* the detailList to set
*/
public void setDetailList(List<Detail> detailList) {
this.detailList = detailList;
}
@Override
public String toString() {
return "Invoice [title=" + title + ", machineNumber=" + machineNumber + ", code=" + code + ", number=" + number
+ ", date=" + date + ", checksum=" + checksum + ", buyerName=" + buyerName + ", buyerCode=" + buyerCode
+ ", buyerAddress=" + buyerAddress + ", buyerAccount=" + buyerAccount + ", password=" + password + ", amount="
+ amount + ", taxAmount=" + taxAmount + ", totalAmountString=" + totalAmountString + ", totalAmount="
+ totalAmount + ", sellerName=" + sellerName + ", sellerCode=" + sellerCode + ", sellerAddress=" + sellerAddress
+ ", sellerAccount=" + sellerAccount + ", payee=" + payee + ", reviewer=" + reviewer + ", drawer=" + drawer
+ ", type=" + type + ", detailList=" + detailList + "]";
}
}
class Detail {
private String name;
private String model;
private String unit;
private BigDecimal count;
private BigDecimal price;
private BigDecimal amount;
private BigDecimal taxRate;
private BigDecimal taxAmount;
/**
* @return the name
*/
public String getName() {
return name;
}
/**
* @param name
* the name to set
*/
public void setName(String name) {
this.name = name;
}
/**
* @return the model
*/
public String getModel() {
return model;
}
/**
* @param model
* the model to set
*/
public void setModel(String model) {
this.model = model;
}
/**
* @return the unit
*/
public String getUnit() {
return unit;
}
/**
* @param unit
* the unit to set
*/
public void setUnit(String unit) {
this.unit = unit;
}
/**
* @return the count
*/
public BigDecimal getCount() {
return count;
}
/**
* @param count
* the count to set
*/
public void setCount(BigDecimal count) {
this.count = count;
}
/**
* @return the price
*/
public BigDecimal getPrice() {
return price;
}
/**
* @param price
* the price to set
*/
public void setPrice(BigDecimal price) {
this.price = price;
}
/**
* @return the amount
*/
public BigDecimal getAmount() {
return amount;
}
/**
* @param amount
* the amount to set
*/
public void setAmount(BigDecimal amount) {
this.amount = amount;
}
/**
* @return the taxRate
*/
public BigDecimal getTaxRate() {
return taxRate;
}
/**
* @param taxRate
* the taxRate to set
*/
public void setTaxRate(BigDecimal taxRate) {
this.taxRate = taxRate;
}
/**
* @return the taxAmount
*/
public BigDecimal getTaxAmount() {
return taxAmount;
}
/**
* @param taxAmount
* the taxAmount to set
*/
public void setTaxAmount(BigDecimal taxAmount) {
this.taxAmount = taxAmount;
}
@Override
public String toString() {
return "Detail [name=" + name + ", model=" + model + ", unit=" + unit + ", count=" + count + ", price=" + price
+ ", amount=" + amount + ", taxRate=" + taxRate + ", taxAmount=" + taxAmount + "]";
}
}
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
public class PDFKeyWordPosition extends PDFTextStripper {
private List<String> keywordList;
private Map<String, List<Position>> positionListMap;
public PDFKeyWordPosition() throws IOException {
super();
}
// 获取坐标信息
public Map<String, List<Position>> getCoordinate(List<String> keywordList, PDDocument document) throws IOException {
super.setSortByPosition(true);
this.keywordList = keywordList;
this.positionListMap = new HashMap<>();
super.setStartPage(1);
super.setEndPage(1);
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
super.writeText(document, dummy);
return positionListMap;
}
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
for (String keyword : keywordList) {
Integer foundIndex = 0;
List<Position> positionList = positionListMap.computeIfAbsent(keyword, k -> new ArrayList<>());
for (int i = 0; i < textPositions.size(); i++) {
TextPosition textPosition = textPositions.get(i);
String str = textPosition.getUnicode();
if (0 < str.length() && str.charAt(0) == keyword.charAt(foundIndex)) {
foundIndex++;
int count = foundIndex;
for (int j = foundIndex; j < keyword.length(); j++) {
if (i + j >= textPositions.size()) {
break;
} else {
String s = textPositions.get(i + j).getUnicode();
if (0 < s.length() && s.charAt(0) == keyword.charAt(j)) {
count++;
}
}
}
if (count == keyword.length()) {
foundIndex = 0;
Position position = new Position();
position.setX(textPosition.getX());
position.setY(textPosition.getY());
positionList.add(position);
positionListMap.put(keyword, positionList);
}
}
}
}
}
}
class Position {
public Position() {
}
public Position(float x, float y) {
super();
this.x = x;
this.y = y;
}
float x;
float y;
/**
* @return the x
*/
public float getX() {
return x;
}
/**
* @param x
* the x to set
*/
public void setX(float x) {
this.x = x;
}
/**
* @return the y
*/
public float getY() {
return y;
}
/**
* @param y
* the y to set
*/
public void setY(float y) {
this.y = y;
}
@Override
public String toString() {
return "Position [x=" + x + ", y=" + y + "]";
}
}
配置文件:
基本上就这样了,有兴趣的同学请参考使用