需求
文章,最开始文章详情需要显示点赞数量、访问数量,以前做法是在调用查询接口 数据库+1 点赞时候访问量+1
update question q set q.view_count=q.view_count+1 where id=1 类似这样做法,其实在高并发场景不合理的,但是还好
需求改变
需要支持时间搜索 搜索某一段时间的访问量
我的方案
参考《redis-缓存设计-统计1秒 5秒 1分钟 访问数量》
比如我设计1分钟延迟,同一分钟时间片都是redis incr 指定字段时间片,然后时间片过了在刷到数据库,因为不支持时分秒搜索 那么一个文章就算一天点赞10万次上亿次一天就生成一条数据 后续都是update 不存在埋点数据过大问题
数据库表
--文章统计指标
CREATE TABLE `question_metric_item`
(
`id` BIGINT(20) NOT NULL AUTO_INCREMENT COMMENT 'ID',
`question_id` BIGINT(20) NOT NULL COMMENT '文章id',
`answer_count` int(11) default 0 COMMENT '评论量',
`subscription_count` int(11) default 0 COMMENT '关注量',
`help_count` int(11) default 0 COMMENT '有帮助',
`comment_count` int(11) default 0 COMMENT '评论回答数',
`no_help_count` int(11) default 0 COMMENT '无帮助',
`share_count` int(11) default 0 COMMENT '分享数',
`view_count` int(11) default 0 COMMENT '查看数',
`created_at` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`provider_id` BIGINT(20) COMMENT '服务商id',
PRIMARY KEY (`id`),
) ENGINE = InnoDB
AUTO_INCREMENT = 1
DEFAULT CHARSET = utf8
ROW_FORMAT = DYNAMIC COMMENT ='文章统计指标';
埋点实现
在对应地方调用对应方法埋点
/**
* 新增阅读量
*
* @param questionId
* @return
*/
public Boolean addViewCount(Long providerId, Long questionId, Date date, Long count) {
if (questionId == null) {
return false;
}
addCount(providerId, QuestionMetricItemRo_.viewCount, questionId, count, date);
return true;
}
/**
* 新增评论量
*
* @param questionId
* @param count
* @return
*/
public Boolean addAnswerCount(Long providerId, Long questionId, Date date, Long count) {
addCount(providerId, QuestionMetricItemRo_.answerCount, questionId, count, date);
return true;
}
/**
* 关注与取消关注
*
* @param questionId
* @param date
* @param count
* @return
*/
public Boolean addSubscriptionCount(Long providerId, Long questionId, Date date, Long count) {
addCount(providerId, QuestionMetricItemRo_.subscriptionCount, questionId, count, date);
return true;
}
/**
* 新增回答量
*
* @param questionId
* @param date
* @param count
* @return
*/
public Boolean addCommentCount(Long providerId, Long questionId, Date date, Long count) {
addCount(providerId, QuestionMetricItemRo_.commentCount, questionId, count, date);
return true;
}
/**
* 有帮助
*
* @param questionId
* @param date
* @param count
* @return
*/
public Boolean addHelpCount(Long providerId, Long questionId, Date date, Long count) {
addCount(providerId, QuestionMetricItemRo_.helpCount, questionId, count, date);
return true;
}
/**
* 无帮助
*
* @param questionId
* @param date
* @param count
* @return
*/
public Boolean addNoHelpCount(Long providerId, Long questionId, Date date, Long count) {
addCount(providerId, QuestionMetricItemRo_.noHelpCount, questionId, count, date);
return true;
}
/**
* 更新分享数
*
* @param questionId
* @param date
* @param count
* @return
*/
public Boolean addShareCount(Long providerId, Long questionId, Date date, Long count) {
addCount(providerId, QuestionMetricItemRo_.shareCount, questionId, count, date);
return true;
}
public void addCount(Long providerId, String field, Long questionId, Long count, Date currentDate) {
if (log.isDebugEnabled()) {
log.debug("field={},questionId={},count={}", field, questionId, count);
}
String id = formatId(currentDate, questionId);
String redisIdKey = this.getRoPrimaryId(id);
//添加到当日list集合
String listKey = getRoPrefix("list");
Date formatDate = formatDate(currentDate);
//时间片过了则统计
Date scoreDate = Time.when(formatDate).setMinute(Time.when(formatDate).getMinute() + PREISION_MINUTE).setSecond(5).getDate();
if (!exists(redisIdKey)) {
Map<String, String> values = MapUtil.newMap(QuestionMetricItemRo_.createdAt, Time.when(currentDate).toString(Time.DEFAULT_TIME_FORMATS[0]),
QuestionMetricItemRo_.id, id, QuestionMetricItemRo_.questionId, String.valueOf(questionId), QuestionMetricItemRo_.providerId, String.valueOf(providerId));
this.hset(redisIdKey, values);
//定时刷入缓存
this.zadd(listKey, NumberUtil.toDouble(scoreDate.getTime()), id);
//设置过期时间
expire(Sets.newHashSet(redisIdKey, listKey), getRo().expireSeconds());
}
//针对文章数量累加1
this.hincrBy(redisIdKey, field, count);
}
public Date formatDate(Date date) {
int preision = 60000 * PREISION_MINUTE;
//算出x分钟内的时间片
long startDateTime = (long) (date.getTime() / preision) * preision;
return new Date(startDateTime);
}
定时任务同步
当然同步的时候除了同步每日的,还需要往主表的Ro进行数量增加
/**
* 同步redis的统计数量到数据库
*/
@Override
public void syncByRedis() throws ParseException {
int offset = 0;
int size = 50;
//队列待消费数量越多 则每次最多偏移200
Date currentDate = new Date();
Long waitCount = questionMetricItemRedisDao.listCount(currentDate);
while (true) {
Set<String> ids = questionMetricItemRedisDao.listKey(currentDate, offset, size);
if (CollectionUtils.isEmpty(ids)) {
log.debug("[QuestionMetricItemSync]没有数据忽略,offset:{},count:{}", offset, waitCount);
break;
}
EweiTLogHandler eweiTLogHandler = new EweiTLogHandler();
try {
eweiTLogHandler.before(UUID.randomUUID().toString().replace("-", ""));
//score到当前时间的数据信息 实现延迟效果
List<QuestionMetricItemRo> questionMetricItemRos = questionMetricItemRedisDao.listById(ids);
log.info("[QuestionMetricItemSync]执行同步,offset:{},count:{},Ids:{}", offset, waitCount, JSON.toJSONString(ids));
log.info("执行同步questionMetricItems={}", JSON.toJSONString(questionMetricItemRos));
List<QuestionMetricItem> questionMetricItems = QuestionMetricItemConvert.INSTANCE.toQuestionMetricItem(questionMetricItemRos);
SpringUtil.getBean(IQuestionMetricItemService.class).batchSaveAndSyncQuestion(questionMetricItems);
} catch (Exception e) {
//埋点失败也不影响后续执行
log.error("[QuestionMetricItemSync]执行同步异常", e);
} finally {
questionMetricItemRedisDao.deleteByIdAndDelRouting(ids);
eweiTLogHandler.clear();
}
}
}
针对实时性
我们知道统计是根据我们的时间片有一定延迟的,针对报表有一定延迟是可以接收的,但是针对详情需要实时性
不可能sum我们分日期的统计,直接用主表又会有延迟,很简单 通过主表的数量+当前时间片未同步的数量就好了
如以下代码
/**
* 合并当前时间片还未同步到redis的count
*
* @param
* @return
*/
public void combineCount(Question question) {
if (question == null) {
return;
}
String formatId = formatId(new Date(), NumberUtil.toLong(question.getId()));
QuestionMetricItemRo itemRo = findById(formatId);
if (itemRo == null) {
return;
}
Integer answerCount = NumberUtil.toInteger(question.getAnswerCount(), 0) + (NumberUtil.toInteger(question.getAnswerCount(), 0));
question.setAnswerCount(answerCount);
Integer subscriptionCount = NumberUtil.toInteger(question.getSubscriptionCount(), 0) + (NumberUtil.toInteger(itemRo.getSubscriptionCount(), 0));
question.setSubscriptionCount(subscriptionCount);
Integer commentCount = NumberUtil.toInteger(question.getCommentCount(), 0) + (NumberUtil.toInteger(itemRo.getCommentCount(), 0));
question.setCommentCount(commentCount);
Integer helpCount = NumberUtil.toInteger(question.getVoteCount(), 0) + (NumberUtil.toInteger(itemRo.getHelpCount(), 0));
question.setVoteCount(helpCount);
Integer noHelpCount = NumberUtil.toInteger(question.getNoHelpCount(), 0) + (NumberUtil.toInteger(itemRo.getNoHelpCount(), 0));
question.setNoHelpCount(noHelpCount);
Integer shareCount = NumberUtil.toInteger(question.getShareCount(), 0) + (NumberUtil.toInteger(itemRo.getShareCount(), 0));
question.setShareCount(shareCount);
Integer viewCount = NumberUtil.toInteger(question.getViewCount(), 0) + (NumberUtil.toInteger(itemRo.getViewCount(), 0));
question.setViewCount(viewCount);
}
针对时间统计sql
SELECT DATE_FORMAT(m.created_at, '%Y-%m-%d') AS created_at,SUM(m.answer_count) AS answer_count,SUM(m.subscription_count) AS subscription_count,SUM(m.help_count) AS help_count,SUM(m.comment_count) AS comment_count,SUM(m.no_help_count) AS no_help_count,SUM(m.share_count) AS share_count,SUM(m.view_count) AS view_count FROM question_metric_item m
JOIN question q ON m.question_id=q.id
WHERE m.provider_id=#{providerId} AND m.created_at>=#{startDate} AND m.created_at<=#{endDate} and (q.created_at>=#{parameter1} and q.updated_at>=#{parameter2}) and (q.updater_id=#{parameter3} or q.author_id=#{parameter4}) AND q.topic_id NOT IN(#{notInTopicIds}) GROUP BY DATE_FORMAT(m.created_at, '%Y-%m-%d')
针对列表统计sql
1.先查满足条件的文章 如果需要根据各个指标排序,则我们主表也冗余了一个总数,可以orderby 文章上面的总数 实现排序
SELECT * FROM question q WHERE q.provider_id=#{providerId} AND q.deleted=0 and (q.created_at>=#{parameter1} and q.updated_at>=#{parameter2}) and (q.updater_id=#{parameter3} or q.author_id=#{parameter4}) AND q.topic_id NOT IN(#{notInTopicIds})
2.再根据文章去统计
SELECT m.question_id, SUM(m.answer_count) AS answer_count,SUM(m.subscription_count) AS subscription_count,SUM(m.help_count) AS help_count,SUM(m.comment_count) AS comment_count,SUM(m.no_help_count) AS no_help_count,SUM(m.share_count) AS share_count,SUM(m.view_count) AS view_count FROM question_metric_item m
WHERE m.created_at>=#{startDate} AND m.created_at<=#{endDate} AND m.question_id in(#{questionIds}) AND m.provider_id=#{providerId} GROUP BY m.question_id
针对汇总统计
SELECT SUM(m.answer_count) AS answer_count,SUM(m.subscription_count) AS subscription_count,SUM(m.help_count) AS help_count,SUM(m.comment_count) AS comment_count,SUM(m.no_help_count) AS no_help_count,SUM(m.share_count) AS share_count,SUM(m.view_count) AS view_count FROM question_metric_item m
JOIN question q ON m.question_id=q.id
WHERE m.provider_id=#{providerId} AND m.created_at>=#{startDate} AND m.created_at<=#{endDate} AND q.deleted=0 and (q.created_at>=#{parameter1} and q.updated_at>=#{parameter2}) and (q.updater_id=#{parameter3} or q.author_id=#{parameter4}) AND q.topic_id NOT IN(#{notInTopicIds})
索引
provider_id created_at加组合索引