ElasticSearch快速实现搜索功能
- 基本概念
- 准备数据
- Java 调用
基本概念
- 映射(mapping)
- 动态映射
Elasticsearch不需要定义Mapping映射(即关系型数据库的表结构),在文档写入 Elasticsearch时,会根据文档字段自动识别类型,这种机制称之为动态映射 - 静态映射
静态映射是在Elasticsearch中创建索引的时候事先定义好映射,包含文档的各字段类型、分词器等,这种方式称之为静态映射
- 分词器
- es自带的分词器,默认standard模式
- ik分词器插件,有ik_smart,ik_max_word两种模式
简单理解standard会把一句话的每个字都拆开,ik_smart会把一句话按中文词语拆开,ik_max_word则比ik_smart拆解的粒度更细,可以用以下方式感受以下三种模式的拆解粒度
GET /books/_analyze
{
"analyzer": "ik_smart",
"text": "深入java虚拟机之一学就废"
}
- keyword 与 text 映射类型的区别
- keyword:精准查询, 不能分词查询,能聚合、 排序
- text:模糊查询, 能分词查询,不能聚合、排序
- match 与 term
- match:输入条件会分词,然后去匹配结果
- term:输入条件不会分词
准备数据
- 静态映射创建索引
PUT /books
{
"settings": {
"number_of_shards": 3,
"index":{
"analysis.analyzer.default.type": "ik_max_word"
}
},
"mappings": {
"doc":{
"properties": {
"title":{
"type": "text",
"analyzer":"standard",
"search_analyzer":"standard"
},
"content":{
"type": "text",
"analyzer":"ik_max_word"
}
}
}
}
}
可以通过 GET /books/_mapping 查看映射
2. 批量创建文档
PUT _bulk
{"index":{"_index":"books","_type":"doc","_id":1}}
{"id":1,"title":"深入java虚拟机","content":"深入java虚拟机之一学就废"}
{"index":{"_index":"books","_type":"doc","_id":2}}
{"id":2,"title":"深入SpringBoot虚拟","content":"深入SpringBoot之虚拟世界一学就废"}
{"index":{"_index":"books","_type":"doc","_id":3}}
{"id":3,"title":"疯狂java讲义","content":"疯狂java讲义之一学就废"}
{"index":{"_index":"books","_type":"doc","_id":4}}
{"id":4,"title":"项目管理知识体系指南","content":"项目管理知识体系指南之一学就废"}
{"index":{"_index":"books","_type":"doc","_id":5}}
{"id":5,"title":"短线炒股实战","content":"短线炒股实战之一学就废"}
- 查询
- 输入“深虚机”,要求查出title包含这几个字的
GET /books/doc/_search
{
"query":{
"match": {
"title": {
"query": "深虚机",
"operator": "and"
}
}
},
"highlight": {
"fields": {
"title": {},
"content": {}
}
}
}
“operator”: "and"表示输入条件分词后,每个条件都需要满足的结果会返回;"or"表示输入条件分词后,仅需满足其一的结果就能返回
- 输入“深虚机”,要求查出content包含这几个字的;
输入“深入虚拟”,要求查出content包含这几个字的
GET /books/doc/_search
{
"query":{
"match": {
"content": {
"query": "深虚机",
"operator": "and"
}
}
},
"highlight": {
"fields": {
"title": {},
"content": {}
}
}
}
可以看到,第一种搜不到结果,第二种可以搜到结果,同时第一种搜title的时候是可以搜到结果的
这个就是因为title是用standard分词的,“深虚机”分词后是“深”,“虚”,“机”,能跟“深入java虚拟机”分词后的结果匹配上;而content是用ik_max_word,“深入java虚拟机之一学就废”分词之后如下所示,以至于“深”,“虚”,“机”匹配不上
{
"tokens": [
{
"token": "深入",
"start_offset": 0,
"end_offset": 2,
"type": "CN_WORD",
"position": 0
},
{
"token": "java",
"start_offset": 2,
"end_offset": 6,
"type": "ENGLISH",
"position": 1
},
{
"token": "虚拟机",
"start_offset": 6,
"end_offset": 9,
"type": "CN_WORD",
"position": 2
},
{
"token": "虚拟",
"start_offset": 6,
"end_offset": 8,
"type": "CN_WORD",
"position": 3
},
{
"token": "机",
"start_offset": 8,
"end_offset": 9,
"type": "CN_CHAR",
"position": 4
},
{
"token": "之一",
"start_offset": 9,
"end_offset": 11,
"type": "CN_WORD",
"position": 5
},
{
"token": "一",
"start_offset": 10,
"end_offset": 11,
"type": "TYPE_CNUM",
"position": 6
},
{
"token": "学",
"start_offset": 11,
"end_offset": 12,
"type": "CN_CHAR",
"position": 7
},
{
"token": "就",
"start_offset": 12,
"end_offset": 13,
"type": "CN_CHAR",
"position": 8
},
{
"token": "废",
"start_offset": 13,
"end_offset": 14,
"type": "CN_CHAR",
"position": 9
}
]
}
- 多字段模糊匹配
GET /books/doc/_search
{
"query": {
"multi_match": {
"query": "深虚拟",
"operator": "and",
"fields": ["title", "content"]
}
},
"highlight": {
"fields": {
"title": {},
"content": {}
}
}
}
- 返回结果对搜索词高亮显示
查询时加上以上参数,返回结果会在搜索词上自动加上<em>
标签,这是默认的,可以自定义
"highlight": {
"pre_tags": ["<span color='red'>"],
"post_tags": ["</span>"],
"fields": {
"title": {},
"content" : {"fragment_size" : 150, "number_of_fragments" : 3 }
}
}
fragment_size: 设置要显示出来的fragment文本判断的长度,默认是100
number_of_fragments:指定显示高亮的fragment文本片段数量
Java 调用
- maven依赖
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>${es.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.78</version>
</dependency>
- 核心代码
@RestController
@RequestMapping("/test")
public class TestController {
@GetMapping("/hello.do")
public String queryFromEs() throws UnsupportedEncodingException {
QueryBuilder queryBuilder = QueryBuilders.matchQuery("title","深虚机").operator(Operator.AND);
EsTools.getInstance().searchQuery("books",queryBuilder);
return "success";
}
}
package com.tyd.utils;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutionException;
/**
* 访问ES的工具类
*
* @see 命令行参考https://github.com/elastic/elasticsearch
*/
public class EsTools {
/**
* 查看ES路径:elasticsearch-7.5.1\config\elasticsearch.yml cluster.name
*/
private static final String CLUSTER_NAME = "my-application";
private static EsTools esTool;
private static TransportClient transportClient;
public static EsTools getInstance() {
if (null == esTool) {
esTool = new EsTools();
}
return esTool;
}
/**
* 减少频繁获取连接,定义一个变量存放连接
*/
private TransportClient getClient() {
if (null == transportClient) {
transportClient = getNewClient();
}
return transportClient;
}
public static void main(String[] args) throws IOException {
String operate = "search";
if ("addIndicesMapping".equals(operate)) {
// 添加索引
String jsonString = "{\"properties\":{\"author\":{\"type\":\"keyword\"},\"title\":{\"type\":\"text\"},\"content\":{\"type\":\"text\"},\"price\":{\"type\":\"integer\"},\"date\":{\"type\":\"date\",\"format\":\"yyyy-MM-dd HH:mm:ss\"}}}";
// 执行成功后显示:----------Add mapping success----------true
getInstance().createIndexAndMapping("indices_test", jsonString);
} else if ("addDocument".equals(operate)) {
// Add id success,version is :1
getInstance().addIndexDocument("indices_test", "_doc");
} else if ("addOrUpdateDocument".equals(operate)) {
// bulk success
getInstance().bulkIndexDocument("indices_test", "_doc");
} else if ("deleteById".equals(operate)) {
getInstance().deleteById("indices_test", "_doc", "id_003");
} else if ("batchDeleteByIds".equals(operate)) {
List<String> ids = new ArrayList<String>();
ids.add("id_001");
ids.add("id_002");
getInstance().batchDeleteByIds("indices_test", "_doc", ids);
} else if ("updateDocument".equals(operate)) {
// result is OK == id指_id
getInstance().updateDocument("indices_test", "_doc", "TNZDUHEB_rQdj7R3LnO4", null);
} else if ("updateDocumentPrepare".equals(operate)) {
// result is UPDATED == id指_id
getInstance().updateDocumentPrepare("indices_test", "_doc", "TNZDUHEB_rQdj7R3LnO4", null);
} else if ("searchByIndex".equals(operate)) {
// id指_id
getInstance().searchByIndex("indices_test", "_doc", "TNZDUHEB_rQdj7R3LnO4");
} else if ("queryAll".equals(operate)) {
// ..."totalHits":{"value":3,"relation":"EQUAL_TO"},"maxScore":1.0}
getInstance().queryAll("indices_test");
} else if ("search".equals(operate)) {
// 查询全部
// QueryBuilder queryBuilder = QueryBuilders.matchAllQuery();
// getInstance().searchQuery("indices_test", queryBuilder);
//以下内容仅仅为查询条件格式
// Span First
/*QueryBuilder queryBuilder = QueryBuilders.spanFirstQuery(
QueryBuilders.spanTermQuery("title", "title"), 1);*/
/*QueryBuilder queryBuilder =QueryBuilders.spanNearQuery(QueryBuilders.spanTermQuery("title", "title"),1000)
.addClause(QueryBuilders.spanTermQuery("title", "title_001"))
.addClause(QueryBuilders.spanTermQuery("title", "title_002"))
.addClause(QueryBuilders.spanTermQuery("title", "title_003"));*/
// ...
}
}
/**
* 根据不同的条件查询
*
* @throws Exception
*/
public void searchQuery(String index, QueryBuilder queryBuilder) {
SearchResponse response = getClient().prepareSearch(index).setQuery(queryBuilder).get();
for (SearchHit searchHit : response.getHits()) {
System.out.println(JSON.toJSONString(searchHit));
}
}
/**
* 根据索引、类型、id获取记录
*
* @param index
* @param type
* @param id
*/
public void searchByIndex(String index, String type, String id) {
GetResponse response = getClient().prepareGet(index, type, id).execute().actionGet();
String json = response.getSourceAsString();
if (null != json) {
System.out.println(json);
} else {
System.out.println("no result");
}
}
/**
* 修改内容
*
* @throws Exception
*/
public boolean updateDocumentPrepare(String index, String type, String id, XContentBuilder source) {
XContentBuilder endObject;
try {
// 修改后的内容
endObject = XContentFactory.jsonBuilder().startObject().field("author", "test_prepare_001").endObject();
UpdateResponse response = getClient().prepareUpdate(index, type, id).setDoc(endObject).get();
System.out.println("result is " + response.getResult().name());
return "UPDATED".equals(response.getResult().name());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return false;
}
/**
* 修改内容
*
* @return
*/
@SuppressWarnings("deprecation")
public boolean updateDocument(String index, String type, String id, XContentBuilder source) {
Date time = new Date();
// 创建修改请求
UpdateRequest updateRequest = new UpdateRequest();
updateRequest.index(index);
updateRequest.type(type);
updateRequest.id(id);
try {
// 根据实际需要调整方法参数source里的值
updateRequest.doc(XContentFactory.jsonBuilder().startObject().field("author", "author001").field("title", "title001")
.field("content", "content001")
.field("date", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(time)).endObject());
UpdateResponse response = getClient().update(updateRequest).get();
System.out.println("result is " + response.status().name());
return "OK".equals(response.status().name());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ExecutionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return false;
}
/**
* 根据id批量删除
*
* @param index
* @param type
* @param ids
* @return
*/
public boolean batchDeleteByIds(String index, String type, List<String> ids) {
if (null == ids || ids.isEmpty()) {
System.out.println("ids is require");
return true;
}
BulkRequestBuilder builder = getClient().prepareBulk();
for (String id : ids) {
builder.add(getClient().prepareDelete(index, type, id).request());
}
BulkResponse bulkResponse = builder.get();
System.out.println(bulkResponse.status());
if (bulkResponse.hasFailures()) {
System.out.println("has failed, " + bulkResponse.status().name());
return false;
}
return true;
}
/**
* 根据索引名称、类型和id删除记录
*
* @param indexName
* @param type
* @param id
*/
public void deleteById(String indexName, String type, String id) {
DeleteResponse dResponse = getClient().prepareDelete(indexName, type, id).execute().actionGet();
if ("OK".equals(dResponse.status().name())) {
System.out.println("delete id success");
} else {
System.out.println("delete id failed : " + dResponse.getResult().toString());
}
}
/**
* 删除某个索引下所有数据
*
* @param indexName
* @return
* @see 删除不存在的索引时,记录实际情况,默认返回成功
*/
public boolean deleteAllIndex(String indexName) {
if (null == indexName || "".equals(indexName.trim())) {
System.out.println("Error: index name is require.");
return false;
}
//如果传人的indexName不存在会出现异常.可以先判断索引是否存在:
IndicesExistsRequest inExistsRequest = new IndicesExistsRequest(indexName);
IndicesExistsResponse inExistsResponse = getClient().admin().indices()
.exists(inExistsRequest).actionGet();
if (inExistsResponse.isExists()) {
AcknowledgedResponse response = getClient().admin().indices().prepareDelete(indexName)
.execute().actionGet();
System.out.println("delete index date, result is " + response.isAcknowledged());
return response.isAcknowledged();
} else {
System.out.println("index is not existed");
}
return true;
}
/**
* 查询索引下的全部数据
*
* @param index
* @param type
*/
public String queryAll(String index) {
QueryBuilder queryBuilder = QueryBuilders.matchAllQuery();
SearchResponse response = getClient().prepareSearch(index).setQuery(queryBuilder).get();
SearchHits resultHits = response.getHits();
return JSON.toJSONString(resultHits);
}
/**
* 添加或者修改ES里的数据
*
* @param index
* @param type
*/
public void bulkIndexDocument(String index, String type) {
BulkRequestBuilder bulkRequest = getClient().prepareBulk();
Date time = new Date();
try {
bulkRequest.add(getClient().prepareIndex(index, type, "id_002")
.setSource(XContentFactory.jsonBuilder()
.startObject()
.field("id", "id_002")
.field("author", "author_002")
.field("title", "titile_002")
.field("content", "content_002")
.field("price", "20")
.field("date", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(time))
.endObject()
)
);
bulkRequest.add(getClient().prepareIndex(index, type, "id_003")
.setSource(XContentFactory.jsonBuilder()
.startObject()
.field("id", "id_003")
.field("author", "author_003")
.field("title", "title_003")
.field("content", "content_003")
.field("price", "30")
.field("date", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(time))
.endObject()
)
);
BulkResponse bulkResponse = bulkRequest.get();
if (bulkResponse.hasFailures()) {
// process failures by iterating through each bulk response item
System.out.println("bulk has failed and token " + bulkResponse.getTook());
} else {
System.out.println("bulk success");
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 根据索引添加数据
*
* @param index
* @param type
*/
public void addIndexDocument(String index, String type) {
Date time = new Date();
IndexResponse response = null;
try {
response = getInstance().getClient().prepareIndex(index, type)
.setSource(XContentFactory.jsonBuilder()
// 以下内容可以封装成一个对象,然后重新解析成如下格式(方法多加一个参数,建议使用反射方式改成通用方法)
.startObject()
.field("id", "id_001")
.field("author", "author_001")
.field("title", "title_001")
.field("content", "content_001")
.field("price", "10")
.field("date", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(time))
.endObject())
.get();
System.out.println("Add id success,version is :" + response.getVersion());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 创建索引和mapping
*
* @param indiceName
* @throws Exception
*/
public boolean createIndexAndMapping(String indiceName, String json) {
if (null == indiceName || "".equals(indiceName.trim())) {
System.out.println("indice is required");
return false;
}
String content = "content";
CreateIndexRequestBuilder cib = getClient().admin().indices().prepareCreate(indiceName);
XContentBuilder builderMapping = generateMappingBuilder(json);
cib.addMapping(content, builderMapping);
CreateIndexResponse res = cib.execute().actionGet();
if (res.isAcknowledged()) {
System.out.println("----------Add mapping success----------" + res.isAcknowledged());
} else {
System.out.println("----------Add mapping failed-----------" + res.isAcknowledged());
}
return res.isAcknowledged();
}
/**
* 根据json动态构造mapping索引对应的XContentBuilder
*
* @param objJson
* @param builder
* @param isBegin 是否是开始位置
*/
private XContentBuilder generateMappingBuilder(Object object) {
XContentBuilder builder = null;
try {
builder = XContentFactory.jsonBuilder();
JSONObject jsonObj = null;
if (object instanceof String) {
jsonObj = JSON.parseObject((String) object);
}
// json对象
generateMappingBuilder(jsonObj, builder, true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println("get json builder error");
}
return builder;
}
/**
* 根据json对象动态构造mapping索引对应的XContentBuilder
*
* @param objJson
* @param builder
* @param isBegin 是否是开始位置
*/
private void generateMappingBuilder(Object objJson, XContentBuilder builder, boolean isBegin) {
try {
// #builder构造,需要添加一个开始"{"
if (isBegin) {
builder.startObject();
}
// json数组
if (objJson instanceof JSONArray) {
JSONArray objArray = (JSONArray) objJson;
for (int i = 0; i < objArray.size(); i++) {
generateMappingBuilder(objArray.get(i), builder, false);
}
}
// json对象
else if (objJson instanceof JSONObject) {
JSONObject jsonObject = (JSONObject) objJson;
Iterator<String> it = jsonObject.keySet().iterator();
while (it.hasNext()) {
String key = it.next().toString();
Object object = jsonObject.get(key);
// builder:key;这里区分object和普通的属性(冒号前认为为对象,冒号后为属性)
if (!key.equals("type") && !key.equals("format")) {
builder.startObject(key);
// System.out.println("==" + key);
}
// json数组
if (object instanceof JSONArray) {
JSONArray objArray = (JSONArray) object;
generateMappingBuilder(objArray, builder, false);
}
// json对象
else if (object instanceof JSONObject) {
generateMappingBuilder((JSONObject) object, builder, false);
}
// 其他
else {
builder.field(key, object.toString());
// System.out.println("====" + key + "," + object.toString());
}
}
// #builder构造,需要添加一个结束"}"
builder.endObject();
// System.out.println("==");
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println("generate mapping builder failed");
}
}
/**
* @return
* @throws IOException
*/
private XContentBuilder generateMapping() throws IOException {
XContentBuilder builder = XContentFactory.jsonBuilder();
builder = builder
// #builer开始"{"
.startObject()
.startObject("properties") //设置之定义字段
.startObject("author")
.field("type", "keyword") //设置数据类型
.endObject()
.startObject("title")
.field("type", "text")
.endObject()
.startObject("content")
.field("type", "text")
.endObject()
.startObject("price")
.field("type", "integer")
.endObject()
.startObject("date")
.field("type", "date") //设置Date类型
.field("format", "yyyy-MM-dd HH:mm:ss") //设置Date的格式
.endObject()
.endObject()
// #builer结束"}"
.endObject();
return builder;
}
/**
* 获取访问ES的连接
*/
private TransportClient getNewClient() {
TransportClient client = null;
try {
Settings settings = Settings.builder().put("cluster.name", CLUSTER_NAME)
// 开启嗅探功能(即自动检测集群内其他的节点和新加入的节点);或者全部用addTransportAddress添加,如下:
.put("client.transport.sniff", true).build();
client = new PreBuiltTransportClient(settings)
.addTransportAddress(new TransportAddress(InetAddress.getByName("127.0.0.1"), 9300));
} catch (UnknownHostException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println("get host error");
}
return client;
}
}