最近遇到这样一个业务,需要从txt文件中读取几十万条数据,并且每条数据是存在父子关系的,需要每条数据拆分成个体然后存入数据库,必须保证不重复存入。数据结构如下:
1111111111,2222222221,3333333331,44444444441
1111111111,2222222221,3333333332,44444444442
1111111111,2222222221,3333333331,44444444443
1111111111,2222222221,3333333334,44444444444
1111111111,2222222222,3333333335,44444444445
1111111111,2222222222,3333333336,44444444446
1111111111,2222222222,3333333337,44444444447
1111111111,2222222222,3333333338,44444444448
......
问题:如果是一个文件一条一条数据处理的话,处理过程一旦中断,下次继续还需要从头开始。
方案:将文件导入系统的过程中直接根据指定数据数量将其拆分为若干小文件,每个文件中存放指定数目的数据量。然后将若干文件分组,限制100个线程分别读取其中的数据处理后入库。
实现步骤:
1.将txt文件导入系统;
2.将导入系统的txt文件中数据读取并入库。
实现代码如下:
1.将txt文件导入系统。
@SuppressWarnings("unchecked")
@RequestMapping(params = "importExcel", method = RequestMethod.POST)
@ResponseBody
public AjaxJson importExcel(HttpServletRequest request, HttpServletResponse response) throws Exception, IOException {
AjaxJson j = new AjaxJson();
MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
Map<String, MultipartFile> fileMap = multipartRequest.getFileMap();
for (Map.Entry<String, MultipartFile> entity : fileMap.entrySet()) {
MultipartFile file = entity.getValue();// 获取上传文件对象
String path = request.getSession().getServletContext().getRealPath("/")+"shuilangyizu/";
String copyPath = request.getSession().getServletContext().getRealPath("/")+"shuilangyizuCopy/";
File filedir = new File(path);
if(!filedir.exists()){
filedir.mkdirs();
}
path = path + UUID.randomUUID();
File dest = new File(path);
file.transferTo(dest);
List<String> strList = FileUtil.getTxt(path);
int i = 0;
List<String> codeList = new ArrayList<String>();
for (String string : strList) {
i = i + 1;
codeList.add(string);
if (i==20) {// 每个文件中只存放20条数据,将多数据的txt文件拆分为若干个存放20条数据的txt文件
FileUtil.splitFile(copyPath, codeList);
i = 0;
codeList = new ArrayList<String>();
}
}
if (i!=0) {
FileUtil.splitFile(copyPath, codeList);
i = 0;
codeList = new ArrayList<String>();
}
dest.delete();
}
FinalData.isDealCodeUpc = 0;
return j;
}
2.将导入系统的txt文件中数据读取并入库。
调用方法:
FileUtil.savesDataCode();
(1)文件操作工具类
package com.shuilangyizu.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import javax.servlet.http.HttpServletRequest;
import org.jeecgframework.core.util.ContextHolderUtils;
import org.jeecgframework.web.system.service.SystemService;
import com.jeecg.scan.entity.CodeEntity;
import me.chanjar.weixin.common.util.StringUtils;
/**
* 文件操作工具类
*
* @author liuf
*
*/
public class FileUtil {/**
* 存储数据的方法
*/
public static void savesDataCode() {
HttpServletRequest request = ContextHolderUtils.getRequest();
String path = request.getSession().getServletContext().getRealPath("/")+"shuilangyizuCopy/";
File file = new File(path);
if(!file.exists()){
file.mkdirs();
}
File files = new File(path);
File[] tempList = files.listFiles();
int sum = tempList.length/100;
List<File> list = new ArrayList<File>();
int i = 0;
int j = 0;
for (File tempfile : tempList) {
i = i + 1;
list.add(tempfile);
if (i==sum) {
j = j + 1;
MoreThreadUtil mr = new MoreThreadUtil(list,"Thread:"+j);
mr.start();
i = 0;
list = new ArrayList<File>();
}
}
if (i!=0) {
j = j + 1;
MoreThreadUtil mr = new MoreThreadUtil(list,"Thread:"+j);
mr.start();
i = 0;
list = new ArrayList<File>();
}
}
/**
* 分组处理数据
* @param tempList
*/
public static void forLoopFiles(List<File> fileList) {
try {
SystemService systemService = (SystemService) SpringTool.getBean("systemService");
for (int k = 0; k < fileList.size(); k++) {
File codeFile = fileList.get(k);
String pathurl = codeFile.getPath();
List<String> list = getTxt(pathurl);
for (String str : list) {
str = str.replaceAll(",", ",");
String[] strs = str.split(",");
int j = 0;
//数据排列
String codedatas = "";
//下一条数据的父ID
String fid = "";
for (int i = 0; i < strs.length; i++) {
//当前数据
String cruCodeStr = strs[i];
String cruStr = "";
if (cruCodeStr.indexOf("?")!=-1) {
String[] codestrs = cruCodeStr.split("\\?");
if (codestrs.length>1) {
cruStr = codestrs[1];
}else{
cruStr = codestrs[0];
}
}else{
cruStr = cruCodeStr;
}
//如果当前数据和上一条数据相等则flag=true;不想等则flag=false。
boolean flag = false;
if (i>0) {
//上一组数据
String beforeCodeStr = strs[i-1];
String beforeStr = "";
if (beforeCodeStr.indexOf("?")!=-1) {
String[] codestrs = beforeCodeStr.split("\\?");
if (codestrs.length>1) {
beforeStr = codestrs[1];
}else{
beforeStr = codestrs[0];
}
}else{
beforeStr = beforeCodeStr;
}
if (cruStr.equals(beforeStr)) {
continue;
}else{
flag = true;
}
}else{
flag = true;
}
if (flag) {
j = j + 1;
List<Object[]> codeList = systemService.findByQueryString(" select id,codedatas from CodeEntity where codelevel = "+j+" and codedata = '"+cruStr+"' ");
codeList = codeList == null?new ArrayList<Object[]>():codeList;
if (codeList.size()>0) {
Object[] sData = codeList.get(0);
codedatas = sData[1]==null?"":sData[1].toString();
fid = sData[0]==null?"":sData[0].toString();
}else{
CodeEntity sData = new CodeEntity();
sData.setFid(fid);
sData.setCodedata(cruStr);
sData.setCodedatas(codedatas);
sData.setCodelevel(j);
sData.setCodestatus("1");
String codetype = "";
if (cruStr.length()==10) {
//10位数据
codetype = "1";
}else if(cruStr.length()==11){
//11位数据
codetype = "2";
}else{
//其他
codetype = "3";
}
sData.setCodetype(codetype);
sData.setCreateDate(new Date());
systemService.saveOrUpdate(sData);
sData.setCodedatas(sData.getCodedatas()+sData.getId());
systemService.saveOrUpdate(sData);
fid = sData.getId();
codedatas = sData.getCodedatas();
}
}
}
}
codeFile.delete();
}
} catch (Exception e) {
// TODO: handle exception
}
}
/**
* 检测网络资源是否存在
*
* @param strUrl
* @return
*/
public static boolean isNetFileAvailable(String strUrl) {
InputStream netFileInputStream = null;
try {
URL url = new URL(strUrl);
URLConnection urlConn = url.openConnection();
netFileInputStream = urlConn.getInputStream();
if (null != netFileInputStream) {
return true;
} else {
return false;
}
} catch (IOException e) {
return false;
} finally {
try {
if (netFileInputStream != null){
netFileInputStream.close();
}
} catch (IOException e) {
}
}
}
/**
* 读取本地txt文件,将其转化为一个字符串集合
* @return
*/
public static List<String> getTxt(String filepath){
try{
String temp = null;
File f = new File(filepath);
//指定读取编码用于读取中文
InputStreamReader read = new InputStreamReader(new FileInputStream(f),"GBK");
List<String> readList = new ArrayList<String>();
BufferedReader reader=new BufferedReader(read);
while((temp=reader.readLine())!=null &&!"".equals(temp)){
readList.add(temp);
}
read.close();
return readList;
}catch (Exception e) {
e.printStackTrace();
return null;
}
}
/**
* 向txt中写入数据
* @param copyPath
* @param codeList
* @return true写成功 false写入失败
*/
public static boolean splitFile(String copyPath, List<String> codeList) {
boolean flag = false;
File copyfile = new File(copyPath);
if(!copyfile.exists()){
copyfile.mkdirs();
}
File copyFiles = new File(copyPath+UUID.randomUUID());
FileWriter fw = null;
try {
if (!copyFiles.exists()) {
copyFiles.createNewFile();
}
fw = new FileWriter(copyFiles);
for (String code : codeList) {
fw.write(code+"\r\n");//向文件中写内容
}
flag = true;
} catch (IOException e) {
e.printStackTrace();
}finally{
if(fw != null){
try {
fw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return flag;
}
}
(2)多线程工具类
package com.shuilangyizu.util;
import java.io.File;
import java.util.List;
public class MoreThreadUtil implements Runnable {
private Thread t;
private String threadName;
private List<File> files;
MoreThreadUtil(List<File> fileList,String name){
files = fileList;
threadName = name;
}
public void run() {
try {
FileUtil.forLoopFiles(files);
}catch (Exception e) {
e.printStackTrace();
}
}
public void start () {
if (t == null) {
t = new Thread(this, threadName);
t.start();
}
}
}