开始写一些小东西来方便自己的生活
前几日和leader讨论了一下记单词的方法(别问我程序员为啥要记单词),词根法比较好。以前也见过一些既用词根又用别的方法记单词的,但是比较难。leader说他发现只用词根的方法也是可以的,只要量足够大,就可以发现其中的规律。遂前往下载单词词库和词根表。使用简单的字符串匹配算法,统计了每种词根在单词中出现的次数。然后调用百度翻译接口(本来打算调用谷歌翻译,但由于 谷歌的限制措施,所以宣告失败),对词库中的单词进行翻译。以上已经写完,不过功能尚未成型,后续功能我还要仔细想想。
上面是项目结构
模块一 从比较凌乱的词库中,将单词取出来,做成特定格式放到新的文件中
package com.chaojilaji.Util;
import java.io.*;
import java.util.Arrays;
/**
* ClassName WrodsFromFiles
* Description TODO
* Auther chaoj
* Date 2018/12/8 22:19
* Version 1.0
**/
public class WrodsFromFiles {
public static String changeFile(String filenmame) throws IOException {
String newfilename = filenmame+"new";
filenmame = filenmame + ".txt";
newfilename = newfilename + ".txt";
File file = new File(filenmame);
if (file.exists()){
InputStream inputStream = new FileInputStream(file);
File file1 = new File(newfilename);
if (!file1.exists()){
file1.createNewFile();
}
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
String ans = "";
String anstmp = "";
int cnt = 0;
int i = 0;
while((line = bufferedReader.readLine())!= null){
// System.out.println(i);
if (line != "\n"){
anstmp = anstmp + line;
i++;
}
if (i >= 10000){
cnt++;
System.out.println(cnt);
ans = ans + anstmp;
anstmp = "";
i=0;
}
}
String x = ans.replaceAll("\"","");
String y = x.replaceAll("\n","");
String z = y.replaceAll("\t","");
String f= z.replaceAll(" "," ");
f.trim();
String[] xxx = f.split(" ");
Arrays.sort(xxx);
String kk = "";
String kktmp = "";
i = 0;
cnt = 0;
int cntline = 0;
int flag = 0;
for (String xxxx: xxx){
if (xxxx != " " && xxxx != "\n" && xxxx != "\b" && xxxx != "\t" && xxxx != "" && xxxx.length() != 0){
if (cntline == 0 && i < 100){
System.out.println("xxxx: "+xxxx.length());
}
if (flag == 0){
kktmp = kktmp + xxxx;
flag = 1;
i++;
cnt ++;
}else {
kktmp = kktmp + " ";
kktmp = kktmp + xxxx;
i++;
cnt ++;
}
}
if (i == 10000){
cntline ++;
System.out.println(cntline);
kk = kk + kktmp;
kktmp = "";
i = 0;
}
}
System.out.println(cnt);
FileOutputStream fileOutputStream = null;
BufferedOutputStream bufferedOutputStream = null;
fileOutputStream = new FileOutputStream(file1);
bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
bufferedOutputStream.write(kk.getBytes());
bufferedOutputStream.flush();
bufferedOutputStream.close();
fileOutputStream.close();
return newfilename;
}else {
System.out.println("文件不存在");
return null;
}
}
public static String changeRootFiles(String filename) throws IOException {
String newfilename = filename + "_new";
filename = filename + ".txt";
newfilename = newfilename + ".txt";
File file = new File(filename);
if (file.exists()){
File file1 = new File(newfilename);
if (!file1.exists()){
file1.createNewFile();
}
InputStream inputStream = new FileInputStream(file);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
String tmp = "";
int cntline = 0;
String ans = "";
while ((line = bufferedReader.readLine())!= null){
tmp = "";
String yisi = "";
String x = "|";
String y = "=";
int indexx = line.indexOf(x);
int indexy = line.indexOf(y);
if (indexx != -1){
if (indexx+1<=line.length()-1) {
if (indexy != -1){
tmp = line.substring(indexx+1,indexy);
yisi = line.substring(indexy+1,line.length()-1);
}else {
int n = line.length();
for (int j = indexx+1;j < n;j++){
String line1 = line.toLowerCase();
char word = line1.charAt(j);
if (word == ',')continue;
int l = word - 'a';
if (l < 0 || l > 26){
indexy = j;
break;
}
}
if (indexy == -1){
tmp = line.substring(indexx+1,n-1);
}else {
tmp = line.substring(indexx+1,indexy);
yisi = line.substring(indexy,n-1);
}
}
}else {
tmp = line.substring(indexx,indexy);
yisi = line.substring(indexy+1,line.length()-1);
}
}
String douhao = ",";
int indexd = tmp.indexOf(douhao);
if (indexd != -1){
String[] xx = tmp.split(",");
for (String xxx : xx){
xxx = xxx+":"+yisi;
xxx = xxx.replaceAll(" ","");
if (cntline == 0){
ans = ans + xxx;
}else {
ans = ans + " ";
ans = ans + xxx;
}
}
}else {
tmp = tmp +":"+yisi;
tmp = tmp.replaceAll(" ","");
if (cntline == 0){
ans = ans + tmp;
}else{
ans = ans + " ";
ans = ans + tmp;
}
}
cntline ++;
}
FileOutputStream fileOutputStream = new FileOutputStream(file1);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
bufferedOutputStream.write(ans.getBytes());
bufferedOutputStream.flush();
bufferedOutputStream.close();
fileOutputStream.close();
return newfilename;
}else {
System.out.println("文件不存在");
return null;
}
}
public static void tongJi(String words,String roots) throws IOException {
String res = words+"_result.txt";
words = words + ".txt";
roots = roots + ".txt";
File wordfile = new File(words);
File rootfile = new File(roots);
if (wordfile.exists() && rootfile.exists()){
InputStream inputStreamword = new FileInputStream(wordfile);
BufferedReader bufferedReaderword = new BufferedReader(new InputStreamReader(inputStreamword));
InputStream inputStreamroot = new FileInputStream(rootfile);
BufferedReader bufferedReaderroot = new BufferedReader(new InputStreamReader(inputStreamroot));
String word = "";
String root = "";
String line = null;
while((line = bufferedReaderroot.readLine())!= null){
if (line.length()!=0) root = root + line;
}
line = null;
while ((line = bufferedReaderword.readLine())!= null){
if (line.length()!=0)word = word + line;
}
String[] wordss = word.split(" ");
String[] rootss = root.split(" ");
int n = rootss.length;
String[] ans = new String[n];
int i = 0;
String trueans = "";
for (String r : rootss){
System.out.println(r);
int nn = r.length();
String rr = "";
String rrr = "";
for (int j = 0;j<nn;j++){
char jj = r.charAt(j);
if (jj == ':'){
rr = r.substring(0,j);
rrr = r.substring(j+1,r.length()-1);
break;
}
}
ans[i] = rrr+"\n"+rr + ": ";
System.out.println("rr:"+rr);
for (String w : wordss){
int index = w.indexOf(rr);
if (index != -1){
ans[i] = ans[i] + w;
ans[i] = ans[i] + ", ";
}
}
trueans = trueans + ans[i];
trueans = trueans + "\n\n";
i++;
}
File file = new File(res);
if (!file.exists()){
file.createNewFile();
}
FileOutputStream fileOutputStream = new FileOutputStream(file);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
bufferedOutputStream.write(trueans.getBytes());
bufferedOutputStream.flush();
bufferedOutputStream.close();
fileOutputStream.close();
}else{
System.out.println("文件不存在");
}
}
}
其中 、
、
这三个是百度翻译相关的代码
模块二 翻译并处理
package com.chaojilaji.Util;
import net.sf.ezmorph.*;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import java.io.*;
/**
* ClassName FanYi
* Description TODO
* Auther chaoj
* Date 2018/12/9 3:19
* Version 1.0
**/
public class FanYi {
public static final String APP_ID = "";
private static final String SECURITY_KEY = "";
public static int getNumberfromfile(String name) throws IOException {
name = name+"_jilu.txt";
File file = new File(name);
if (file.exists()){
InputStream inputStream = new FileInputStream(file);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = bufferedReader.readLine();
if (line != null){
int n = Integer.parseInt(line);
return n;
}
}
name = name + "_fanyi.txt";
File file1 = new File(name);
if (file1.exists()){
InputStream inputStream = new FileInputStream(file1);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
int cnt = 0;
while ((line = bufferedReader.readLine()) != null){
cnt ++;
}
return cnt;
}else {
System.out.println("文件不存在");
return -1;
}
}
public static String fanyi(String word){
TransApi api = new TransApi(APP_ID, SECURITY_KEY);
String json = api.getTransResult(word, "en", "zh");
JSONObject jsonObject = JSONObject.fromObject(json);
Object ans = jsonObject.get("trans_result");
Object ans1 = null;
Object ans2 = null;
// System.out.println(json);
if (ans instanceof JSONArray){
JSONArray jsonArray = (JSONArray) ans;
int n = jsonArray.size();
ans1 = jsonArray.get(n-1);
// System.out.println(ans);
if (ans1 instanceof JSONObject){
ans2 = ((JSONObject) ans1).get("dst");
}
}
return (String) ans2;
}
public static int fanYiFromword(String name,int begin,int le) throws IOException {
String newname = name + "_fanyi";
String jilu = name + "_jilu";
name = name + ".txt";
newname = newname + ".txt";
File file = new File(name);
if (file.exists()){
InputStream inputStream = new FileInputStream(file);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
String tmps = "";
int cntline = 0;
String words = "";
int cntword = 0;
while((line = bufferedReader.readLine())!=null){
if (line != "" && line.length()>0){
tmps = tmps + line;
if (cntline == 10){
cntline = 0;
words = words + tmps;
tmps = "";
}
cntline ++;
}
}
words = words + tmps;
String[] words1 = words.split(" ");
String ans = "";
int cntnowword = 0;
for (String words2 : words1){
cntword ++ ;
if (cntword < begin){
continue;
}else {
String words3 = words2 + ": ";
String tmp = fanyi(words2);
// System.out.println(tmp);//null
words3 = words3 + tmp + "\n";
ans = ans + words3;
cntnowword ++;
if ( cntnowword == le){
break;
}
}
}
File file1 = new File(newname);
if (!file1.exists()){
file1.createNewFile();
}
FileOutputStream fileOutputStream = new FileOutputStream(file1,true);
if (fileOutputStream == null){
return begin;
}
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
if (bufferedOutputStream == null){
return begin;
}
bufferedOutputStream.write(ans.getBytes());
bufferedOutputStream.flush();
bufferedOutputStream.close();
fileOutputStream.close();
jilu = jilu + ".txt";
File file2 = new File(jilu);
if (!file2.exists()){
file2.createNewFile();
}
String n = "";
int k = begin+le;
n = n + k;
FileOutputStream fileOutputStream1 = new FileOutputStream(file2);
BufferedOutputStream bufferedOutputStream1 = new BufferedOutputStream(fileOutputStream1);
bufferedOutputStream1.write(n.getBytes());
bufferedOutputStream1.flush();
bufferedOutputStream1.close();
fileOutputStream1.close();
return begin + le;
}else {
System.out.println("文件不存在");
return begin;
}
}
}
最后是调用模块
package com.chaojilaji;
import com.chaojilaji.Util.FanYi;
import com.chaojilaji.Util.WrodsFromFiles;
import org.apache.commons.lang.WordUtils;
import java.io.IOException;
/**
* ClassName FenWord
* Description TODO
* Auther chaoj
* Date 2018/12/8 22:14
* Version 1.0
**/
public class FenWord {
private static void getwordFromfile() throws IOException {
String name = "C:\\Users\\chaoj\\Desktop\\读书笔记\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\英文词库";
WrodsFromFiles.changeFile(name);
}
private static void getrootFromfile() throws IOException {
String roots = "C:\\Users\\chaoj\\Desktop\\读书笔记\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\english-root";
WrodsFromFiles.changeRootFiles(roots);
}
private static void tongJi() throws IOException {
String words = "C:\\Users\\chaoj\\Desktop\\读书笔记\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\英文词库new";
String roots = "C:\\Users\\chaoj\\Desktop\\读书笔记\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\english-root_new";
WrodsFromFiles.tongJi(words,roots);
}
private static void fanyi() throws IOException {
String name = "C:\\Users\\chaoj\\Desktop\\读书笔记\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\英文词库new";
int s = FanYi.getNumberfromfile(name);
if (s != -1){
int n = s + 10000;
for (int i = s;i < n;i ++){
int le = 100;
int begin = i;
int k = FanYi.fanYiFromword(name,begin,le);
if (k == begin){
System.out.println("失败");
}else {
System.out.println("成功");
}
i += 99;
}
}else{
System.out.println("无法获取当前行数,请手动打开");
}
}
public static void main(String args[]){
try {
fanyi();
}catch (Exception e){
e.printStackTrace();
}
}
}
这里对一些特殊写法进行说明:
当读取文件中的数据时,如果每次都直接在最终字符串上进行增加,会导致大字符串的拷贝量增加,从而影响性能,所以我做了一点优化就是弄一个tmp变量,减少大字符串的赋值次数。