文章目录
- 0 项目说明
- 1 系统模块
- 2 分析内容
- 3 界面展示
- 4 项目源码
0 项目说明
基于Spark网易云音乐数据分析
提示:适合用于课程设计或毕业设计,工作量达标,源码开放
项目分享:
https://gitee.com/asoonis/feed-neo
1 系统模块
包含爬虫,Scala代码,Spark,Hadoop,ElasticSearch,logstash,Flume,echarts,log4j
emotional_analysis_spider 爬虫模块
emotional_analysis_web 数据处理模块(Scala代码)
emotional_analysis_recommend 推荐模块目前还未开发
emotional_analysis_web 报表展现模块
Crawler-sample-data 爬虫抓取样例数据
MySQL-yuncun MySQL数据库结构及数据
config ES logStash Flume 相关配置文件
mlib-text 机器学习分类测试相关数据
statistical-static-txt SparkSQL相关关联静态数据
2 分析内容
1.图计算
2.机器学习预测歌曲分类
3.评论词云
4.评论时间段
5.评论top榜
6.热歌top榜
7.用户性别比例
8.用户星座比例
9.用户年龄比例
10.用户全国地理分布
11.热评搜索等
3 界面展示
4 项目源码
package org.bianqi.wangyi.web.es.test;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.bianqi.web.wangyi.utils.ESClientUtils;
import org.bianqi.web.wangyi.utils.PageBean;
import org.bianqi.web.wangyi.utils.SearchUtils;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.highlight.HighlightField;
import org.junit.Before;
import org.junit.Test;
import com.github.pagehelper.PageHelper;
import com.github.pagehelper.PageInfo;
/**
* ES客户端
* <p>Title: ESClientTest</p>
* <p>Description: </p>
* <p>School: qiqihar university</p>
* @author BQ
* @date 2018年2月23日下午6:46:19
* @version 1.0.0
*/
public class ESClientTest {
private TransportClient client;
private IndexRequest source;
@Before
public void before11() throws Exception {
Settings settings = Settings.settingsBuilder().put("client.transport.ignore_cluster_name", true) // 忽略集群名字验证,
.build();
client = TransportClient.builder().settings(settings).build()
.addTransportAddress(new InetSocketTransportAddress(new InetSocketAddress("192.168.254.215", 9300)));
System.out.println("success connect");
}
/**
* 查看集群信息
*/
@Test
public void testInfo() {
List<DiscoveryNode> nodes = client.connectedNodes();
for (DiscoveryNode node : nodes) {
System.out.println(node.getHostAddress());
}
}
/**
* 组织json串, 方式1,直接拼接
*/
public String createJson1() {
String json = "{" +
"\"user\":\"kimchy\"," +
"\"postDate\":\"2013-01-30\"," +
"\"message\":\"trying out Elasticsearch\"" +
"}";
return json;
}
/**
* 使用map创建json
*/
public Map<String, Object> createJson2() {
Map<String,Object> json = new HashMap<String, Object>();
json.put("user", "kimchy");
json.put("postDate", new Date());
json.put("message", "trying out elasticsearch");
return json;
}
/**
* 使用es的帮助类
*/
public XContentBuilder createJson4() throws Exception {
// 创建json对象, 其中一个创建json的方式
XContentBuilder source = XContentFactory.jsonBuilder()
.startObject()
.field("user", "kimchy")
.field("postDate", new Date())
.field("message", "trying to out ElasticSearch")
.endObject();
return source;
}
@Test
public void test1() throws Exception {
XContentBuilder source = createJson4();
// 存json入索引中
IndexResponse response = client.prepareIndex("twitter", "tweet", "1").setSource(source).get();
// // 结果获取
String index = response.getIndex();
String type = response.getType();
String id = response.getId();
long version = response.getVersion();
boolean created = response.isCreated();
System.out.println(index + " : " + type + ": " + id + ": " + version + ": " + created);
}
/**
* get API 获取指定文档信息
*/
@Test
public void testGet() {
// GetResponse response = client.prepareGet("twitter", "tweet", "1")
// .get();
GetResponse response = client.prepareGet("log4j-2018.02.19", "log4j_type", "AWGvky2aWyh2zXt6_1Um")
.setOperationThreaded(false) // 线程安全
.get();
System.out.println(response.getSourceAsString());
}
@Test
public void testGet1(){
SearchResponse response = client.prepareSearch("log4j-2018.02.19")
.setTypes("log4j_type")
// 设置查询类型
// 1.SearchType.DFS_QUERY_THEN_FETCH = 精确查询
// 2.SearchType.SCAN = 扫描查询,无序
// 3.SearchType.COUNT = 不设置的话,这个为默认值,还有的自己去试试吧
.setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
// 设置查询关键词
.setQuery(QueryBuilders.matchQuery("log4j_type.thread", "main"))
.addHighlightedField("main")
.setHighlighterPreTags("<em>")
.setHighlighterPostTags("</em>")
// 设置查询数据的位置,分页用
.setFrom(0)
// 设置查询结果集的最大条数
.setSize(60)
// 设置是否按查询匹配度排序
.setExplain(true)
// 最后就是返回搜索响应信息
.execute()
.actionGet();
SearchHits searchHits = response.getHits();
System.out.println("-----------------在["+"ma"+"]中搜索关键字["+"main"+"]---------------------");
System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!");
SearchHit[] hits = searchHits.getHits();
for (SearchHit searchHit : hits) {
//获取高亮的字段
Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
HighlightField highlightField = highlightFields.get("main");
System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string());
Map<String, Object> sourceAsMap = searchHit.sourceAsMap();
Set<String> keySet = sourceAsMap.keySet();
for (String string : keySet) {
//key value 值对应关系
System.out.println(string+":"+sourceAsMap.get(string));
}
}
}
@Test
public void test4(){
SearchRequestBuilder responsebuilder = client.prepareSearch("twitter").setTypes("tweet") ;
SearchResponse myresponse=responsebuilder.setQuery(QueryBuilders.matchPhraseQuery("user", "kimchy"))
.setFrom(0).setSize(10).setExplain(true).execute().actionGet();
SearchHits hits = myresponse.getHits();
for (int i = 0; i < hits.getHits().length; i++) {
System.out.println(hits.getHits()[i].getSourceAsString());}
}
@Test
public void test5(){
BoolQueryBuilder boolBuilder = QueryBuilders.boolQuery();
//使用should实现或者查询
boolBuilder.should(QueryBuilders.matchQuery("content","爱我"));
//c查询
SearchRequestBuilder searchRequestBuilder = this.client.prepareSearch("hotcomments")
.setTypes("logs")
.setSearchType(SearchType.DFS_QUERY_THEN_FETCH) //设置查询类型:1.SearchType.DFS_QUERY_THEN_FETCH 精确查询; 2.SearchType.SCAN 扫描查询,无序
.setQuery(boolBuilder)
.setSize(10);
//设置高亮显示
searchRequestBuilder.setHighlighterPostTags("</span>");
searchRequestBuilder.setHighlighterPreTags("<span style=\"color:red\">");
searchRequestBuilder.addHighlightedField("content");
//执行结果
SearchResponse response = searchRequestBuilder.get();
//接受结果
List<Map<String,Object>> result = new ArrayList<>();
//遍历结果
for(SearchHit hit:response.getHits()){
Map<String, Object> source = hit.getSource();
//处理高亮片段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField nameField = highlightFields.get("content");
if(nameField!=null){
Text[] fragments = nameField.fragments();
String nameTmp ="";
for(Text text:fragments){
nameTmp+=text;
}
//将高亮片段组装到结果中去
source.put("content",nameTmp);
}
result.add(source);
}
for (Map<String, Object> map : result) {
Set<Entry<String, Object>> entrySet = map.entrySet();
for (Entry<String, Object> entry : entrySet) {
System.out.println(entry.getKey()+":::"+entry.getValue());
}
}
}
@Test
public void test6() throws Exception{
PageBean<Map<String,Object>> searchListPageBean = new PageBean<Map<String,Object>>();
BoolQueryBuilder boolBuilder = QueryBuilders.boolQuery();
boolBuilder.must(QueryBuilders.matchQuery("content","那就这样吧"));
PageHelper.startPage(5, 3);
SearchRequestBuilder searchRequestBuilder = ESClientUtils.getClient().prepareSearch("hotcomments")
.setTypes("logs")
.setSearchType(SearchType.DFS_QUERY_THEN_FETCH) //设置查询类型:1.SearchType.DFS_QUERY_THEN_FETCH 精确查询; 2.SearchType.SCAN 扫描查询,无序
.setQuery(boolBuilder)
.setFrom(5)
.setSize(3);
long totalHits = ESClientUtils.getClient().prepareSearch("hotcomments")
.setTypes("logs")
.setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
.setQuery(boolBuilder)
.get()
.getHits().getTotalHits();
//设置高亮显示
searchRequestBuilder.setHighlighterPostTags("</span>");
searchRequestBuilder.setHighlighterPreTags("<span style=\"color:red\">");
searchRequestBuilder.addHighlightedField("content");
//执行结果
SearchResponse response = searchRequestBuilder.get();
//接受结果
List<Map<String,Object>> result = new ArrayList<>();
//遍历结果
for(SearchHit hit:response.getHits()){
Map<String, Object> source = hit.getSource();
String songName = null;
String songUrl = null;
Set<Entry<String, Object>> entrySet = source.entrySet();
for (Entry<String, Object> entry : entrySet) {
if(entry.getKey().equals("songId")){
Integer songId = (Integer)entry.getValue();
songName = SearchUtils.getSongNameById(songId);
songUrl = SearchUtils.getSongUrlById(songId);
}
}
source.put("songName", songName);
source.put("songUrl", songUrl);
//处理高亮片段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField nameField = highlightFields.get("content");
if(nameField!=null){
Text[] fragments = nameField.fragments();
String nameTmp ="";
for(Text text:fragments){
nameTmp+=text;
}
//将高亮片段组装到结果中去
source.put("content",nameTmp);
}
result.add(source);
}
PageInfo<Map<String,Object>> pageInfo = new PageInfo<>(result);
long totalPage = (totalHits + 3 - 1) / 3;
int pageNum = pageInfo.getPageNum();
searchListPageBean.setPage(pageNum);
searchListPageBean.setTotalPage(totalPage);
searchListPageBean.setList(result);
for (Map<String, Object> map : result) {
Set<Entry<String, Object>> entrySet = map.entrySet();
for (Entry<String, Object> entry : entrySet) {
System.out.println(entry.getKey()+":::"+entry.getValue());
}
}
}
}
项目分享:
https://gitee.com/asoonis/feed-neo