1.准备工作(jdk1.8)
使用之前需要获取对应的项目API_KEY,SECRET_KEY,这些参数在使用API的时候必须用到,用于生成access_token.
如何获取这些参数?
http://ai.baidu.com/tech/ocr/general
登陆后创建一个应用
点击创建应用,填入对应的信息.点击应用列表就可以得到API_KEY,SECRET_KEY了
2.下载java的SDK,并将jar包引入项目
3.引入jar包(百度云:链接:https://pan.baidu.com/s/1L6cQcFexhBqj1gjA9_FLFA 提取码:q65z )
预览:
4.代码实现(注意很多人eclipse不能用BASE64Encoder这个类,详见:https://jingyan.baidu.com/article/066074d627f506c3c21cb0aa.html)
1 package com.it;
2
3 import java.io.BufferedReader;
4 import java.io.InputStreamReader;
5 import java.net.HttpURLConnection;
6 import java.net.URL;
7 import java.util.List;
8 import java.util.Map;
9
10 import org.json.JSONObject;
11
12 /**
13 *
14 * @author csh
15 *
16 * 获取token类
17 */
18 public class AuthService {
19
20 /**
21 * 获取权限token
22 * @return 返回token
23 */
24
25 public static String getAuth() {
26 // 官网获取的 API Key 更新为你注册的
27 String clientId = "frNrube3CEybwGiXUOgrYH23";
28 // 官网获取的 Secret Key 更新为你注册的
29 String clientSecret = "05He2i4heaL21TgPbQeqWrt3u8Qh4RCl";
30 return getAuth(clientId, clientSecret);
31 }
32
33 /**
34 * 获取API访问token 该token有一定的有效期,需要自行管理,当失效时需重新获取.
35 *
36 * @param ak - 百度云官网获取的 API Key
37 * @param sk - 百度云官网获取的 Securet Key
38 */
39 public static String getAuth(String ak, String sk) {
40 // 获取token地址
41 String authHost = "https://aip.baidubce.com/oauth/2.0/token?";
42 String getAccessTokenUrl = authHost
43 // 1. grant_type为固定参数
44 + "grant_type=client_credentials"
45 // 2. 官网获取的 API Key
46 + "&client_id=" + ak
47 // 3. 官网获取的 Secret Key
48 + "&client_secret=" + sk;
49 try {
50 URL realUrl = new URL(getAccessTokenUrl);
51 // 打开和URL之间的连接
52 HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
53 connection.setRequestMethod("GET");
54 connection.connect();
55 // 获取所有响应头字段
56 Map<String, List<String>> map = connection.getHeaderFields();
57 // 遍历所有的响应头字段
58 for (String key : map.keySet()) {
59 System.err.println(key + "--->" + map.get(key));
60 }
61 // 定义 BufferedReader输入流来读取URL的响应
62 BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
63 String result = "";
64 String line;
65 while ((line = in.readLine()) != null) {
66 result += line;
67 }
68 /**
69 * 返回结果示例
70 */
71 System.err.println("result:" + result);
72 JSONObject jsonObject = new JSONObject(result);
73 String access_token = jsonObject.getString("access_token");
74 return access_token;
75 } catch (Exception e) {
76 System.err.printf("获取token失败!");
77 e.printStackTrace(System.err);
78 }
79 return null;
80 }
81 }
1 package com.it;
2
3 import sun.misc.BASE64Encoder;
4
5 import java.io.FileInputStream;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.net.URLEncoder;
9
10 /**
11 *
12 * @author csh
13 *
14 */
15 public class BaseImg64 {
16
17 /**
18 * 将一张本地图片转化成Base64字符串
19 */
20 public static String getImageStrFromPath(String imgPath) {
21 InputStream in;
22 byte[] data = null;
23 // 读取图片字节数组
24 try {
25 in = new FileInputStream(imgPath);
26 data = new byte[in.available()];
27 in.read(data);
28 in.close();
29 } catch (IOException e) {
30 e.printStackTrace();
31 }
32 // 对字节数组Base64编码
33 BASE64Encoder encoder = new BASE64Encoder();
34 // 返回Base64编码过再URLEncode的字节数组字符串
35 return URLEncoder.encode(encoder.encode(data));
36 }
37 }
1 package com.it;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.net.URI;
6 import java.net.URISyntaxException;
7
8 import org.apache.http.HttpResponse;
9 import org.apache.http.client.HttpClient;
10 import org.apache.http.client.methods.HttpPost;
11 import org.apache.http.entity.StringEntity;
12 import org.apache.http.impl.client.DefaultHttpClient;
13 import org.apache.http.util.EntityUtils;
14 /**
15 *
16 * @author csh
17 *图像文字识别
18 */
19 public class Check {
20
21 private static final String POST_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=" + AuthService.getAuth();
22
23 /**
24 * 识别本地图片的文字
25 */
26 public static String checkFile(String path) throws URISyntaxException, IOException {
27 File file = new File(path);
28 if (!file.exists()) {
29 throw new NullPointerException("图片不存在");
30 }
31 String image = BaseImg64.getImageStrFromPath(path);
32 String param = "image=" + image;
33 return post(param);
34 }
35
36 /**
37 * 图片url
38 * 识别结果,为json格式
39 */
40 public static String checkUrl(String url) throws IOException, URISyntaxException {
41 String param = "url=" + url;
42 return post(param);
43 }
44
45 /**
46 * 通过传递参数:url和image进行文字识别
47 */
48 private static String post(String param) throws URISyntaxException, IOException {
49 //开始搭建post请求
50 HttpClient httpClient = new DefaultHttpClient();
51 HttpPost post = new HttpPost();
52 URI url = new URI(POST_URL);
53 post.setURI(url);
54
55 //设置请求头,请求头必须为application/x-www-form-urlencoded,因为是传递一个很长的字符串,不能分段发送
56 post.setHeader("Content-Type", "application/x-www-form-urlencoded");
57 StringEntity entity = new StringEntity(param);
58 post.setEntity(entity);
59 HttpResponse response = httpClient.execute(post);
60 System.out.println(response.toString());
61 if (response.getStatusLine().getStatusCode() == 200) {
62 String str;
63 try {
64 //读取服务器返回过来的json字符串数据
65 str = EntityUtils.toString(response.getEntity());
66 //很重要,主要是解决返回中文的编码格式。
67 str = new String(str.getBytes("ISO-8859-1"),"UTF-8");
68 System.out.println(str);
69 return str;
70 } catch (Exception e) {
71 e.printStackTrace();
72 return null;
73 }
74 }
75 return null;
76 }
77
78 /**
79 * 执行方法
80 * @param args
81 */
82 public static void main(String[] args) {
83 //图片路径
84 String path = "E:\\newspace\\CDBC5937-A78B-41e7-BC12-30F921D3264F.png";
85
86 try {
87 long now = System.currentTimeMillis();
88 checkFile(path);
89 System.out.println("耗时:" + (System.currentTimeMillis() - now) / 1000 + "s");
90 } catch (URISyntaxException | IOException e) {
91 e.printStackTrace();
92 }
93 }
94 }
5.实现结果
实验图片:
实验结果:
{
"log_id": 6134374853401645554,
"words_result_num": 14,
"words_result": [{
"words": "实时热点"
}, {
"words": "换一换"
}, {
"words": "端火锅泼妻子同学默克尔呼吸急促"
}, {
"words": "高楼坠刀嫌疑人"
}, {
"words": "常州奔驰连撞多车"
}, {
"words": "菲律宾打击性骚扰新张扣扣被执行死刑"
}, {
"words": "王思聪股权被冻结章莹颖案结案陈词"
}, {
"words": "荷兰弟恋情晛光"
}, {
"words": "孙杨暴力抗检听证"
}, {
"words": "强生爽身粉致癌案"
}, {
"words": "脑机接口系统"
}, {
"words": "个人破产制度试点走98800步遭质疑新"
}, {
"words": "007主角变成黑人"
}, {
"words": "华为申请专利"
}]
}
实验总结:百度的图片文字APi还是挺厉害的,比te4j厉害多了。但是有次数限制个人免费 “5000次/人/天”。不过学习起来还是可以的。如有不足之处请各位大佬指点!