每个消费者都会通过HeartbeatTask任务定时向GroupCoordinator发送heartbeatRequest,告知GroupCoordinator自己正常在线。
HeartBeatRequest首先由KafkaApi.handleHeartbeatRequest方法进行处理,它负责验证权限,定义回调函数,并将请求委托给GroupCoordinator处理。

def handleHeartbeatRequest(request: RequestChannel.Request) {
    val heartbeatRequest = request.body.asInstanceOf[HeartbeatRequest]
    val respHeader = new ResponseHeader(request.header.correlationId)

    // the callback for sending a heartbeat response
    // 定义回调函数,把heartbeatResponse放入requestChannel等待发送
    def sendResponseCallback(errorCode: Short) {
      val response = new HeartbeatResponse(errorCode)
      trace("Sending heartbeat response %s for correlation id %d to client %s."
        .format(response, request.header.correlationId, request.header.clientId))
      requestChannel.sendResponse(new RequestChannel.Response(request, new ResponseSend(request.connectionId, respHeader, response)))
    }

    if (!authorize(request.session, Read, new Resource(Group, heartbeatRequest.groupId))) {
      val heartbeatResponse = new HeartbeatResponse(Errors.GROUP_AUTHORIZATION_FAILED.code)
      requestChannel.sendResponse(new Response(request, new ResponseSend(request.connectionId, respHeader, heartbeatResponse)))
    }
    else {
      // let the coordinator to handle heartbeat
      // 把heartbeat委托给GroupCoordinator处理
      coordinator.handleHeartbeat(
        heartbeatRequest.groupId(),
        heartbeatRequest.memberId(),
        heartbeatRequest.groupGenerationId(),
        sendResponseCallback)
    }
  }

GroupCoordinator.handleHeartbeat首先进行一系列的检测,保证groupMetadataManager处于可用状态且是对应消费者组的管理者。之后检测Consumer Group状态、memberID、generationId是否合法。最后调用HeartbeatExpiraion方法。

def handleHeartbeat(groupId: String,
                      memberId: String,
                      generationId: Int,
                      responseCallback: Short => Unit) {
    if (!isActive.get) {
      responseCallback(Errors.GROUP_COORDINATOR_NOT_AVAILABLE.code)
    } else if (!isCoordinatorForGroup(groupId)) {
        // 检测GroupCoordinator是否管理此consumer group
      responseCallback(Errors.NOT_COORDINATOR_FOR_GROUP.code)
    } else if (isCoordinatorLoadingInProgress(groupId)) {
      // the group is still loading, so respond just blindly
      // 是否已经加载对应的offsets topic分区
      responseCallback(Errors.NONE.code)
    } else {
      val group = groupManager.getGroup(groupId)
      if (group == null) { // 检测groupmetadata是否存在
        responseCallback(Errors.UNKNOWN_MEMBER_ID.code)
      } else {
        group synchronized {
          if (group.is(Dead)) {
            // if the group is marked as dead, it means some other thread has just removed the group
            // from the coordinator metadata; this is likely that the group has migrated to some other
            // coordinator OR the group is in a transient unstable phase. Let the member retry
            // joining without the specified member id,
            responseCallback(Errors.UNKNOWN_MEMBER_ID.code)
            // 检测consumer Group的状态
          } else if (!group.is(Stable)) {
            responseCallback(Errors.REBALANCE_IN_PROGRESS.code)
          } else if (!group.has(memberId)) { // 检测memberID
            responseCallback(Errors.UNKNOWN_MEMBER_ID.code)
          } else if (generationId != group.generationId) { // 检测generationID
            responseCallback(Errors.ILLEGAL_GENERATION.code)
          } else {
            val member = group.get(memberId)
            // 继续下一步操作
            completeAndScheduleNextHeartbeatExpiration(group, member)
            responseCallback(Errors.NONE.code)
          }
        }
      }
    }
  }

在completeAndScheduleNextHeartbeatExpiration中,更新收到这个member心跳的时间戳,会尝试执行响应的delayeHeartbeat,并创建新的delayedHeartbeat对象放入heartbeatPurgatory中等待下次心跳到来或者delayedheartbeat超时。

private def completeAndScheduleNextHeartbeatExpiration(group: GroupMetadata, member: MemberMetadata) {
    // complete current heartbeat expectation
    // 更新心跳时间
    member.latestHeartbeat = time.milliseconds()
    // 获取delayHeartbeat的key
    val memberKey = MemberKey(member.groupId, member.memberId)
    // 尝试完成之前添加的delayedheartbeat
    heartbeatPurgatory.checkAndComplete(memberKey)

    // reschedule the next heartbeat expiration deadline
    // 计算下一次heartbeat的超时时间
    val newHeartbeatDeadline = member.latestHeartbeat + member.sessionTimeoutMs
    // 创建新的delayedHeartbeat对象,并添加到heartbeatPurgatory中
    val delayedHeartbeat = new DelayedHeartbeat(this, group, member, newHeartbeatDeadline, member.sessionTimeoutMs)
    heartbeatPurgatory.tryCompleteElseWatch(delayedHeartbeat, Seq(memberKey))
  }

下面我们看看delayedHeartbeat的实现,主要字段有:

private[coordinator] class DelayedHeartbeat(coordinator: GroupCoordinator,// GroupCoordinator对象,delayedHeartbeat中的方法的实现方式都是调用GroupCoordinator中对应的方法
                                            group: GroupMetadata,// 对应GroupMetadata对象
                                            member: MemberMetadata, // 对应MemberMetadata对象
                                            heartbeatDeadline: Long, //delayedHeartbeat到期时间戳
                                            sessionTimeout: Long) // 指定delayedHeartbeat的到期时长,这个时间是消费者在JoinGroupRequest中设置的,并符合GroupConfig指定的合法区间
  extends DelayedOperation(sessionTimeout) {
  override def tryComplete(): Boolean = coordinator.tryCompleteHeartbeat(group, member, heartbeatDeadline, forceComplete)
  override def onExpiration() = coordinator.onExpireHeartbeat(group, member, heartbeatDeadline)
  override def onComplete() = coordinator.onCompleteHeartbeat()
}

tryCompleteHeartbeat会检测下列四个条件,如果满足其中任意一个,就认为DelayedHeartbeat符合执行条件:

def tryCompleteHeartbeat(group: GroupMetadata, member: MemberMetadata, heartbeatDeadline: Long, forceComplete: () => Boolean) = {
    group synchronized {
      if (shouldKeepMemberAlive(member, heartbeatDeadline) || member.isLeaving) // member已经离开的消费者组
        forceComplete()
      else false
    }
  }
  
  private def shouldKeepMemberAlive(member: MemberMetadata, heartbeatDeadline: Long) =
    member.awaitingJoinCallback != null || //awaitingJoinCallback不是null,即消费者正在等待JoinGroupResponse
      member.awaitingSyncCallback != null ||//awaitingSyncCallback不是null,即消费者正在等待SyncGroupResponse
      member.latestHeartbeat + member.sessionTimeoutMs > heartbeatDeadline // 最后一次收到心跳信息的时间和heartbeatDeadline的差距大于sessionTimeout。

  private def isCoordinatorForGroup(groupId: String) = groupManager.isGroupLocal(groupId)

  private def isCoordinatorLoadingInProgress(groupId: String) = groupManager.isGroupLoading(groupId)
}

onCompleteHeartbeat是空实现,所以DelayedHeartbeat执行之后仅会从hearbeatPurgatory中删除,并不会指定其他操作。
DelayedHeartbeat到期执行还会调用GroupCoordinator.onExpireHeartbeat方法,它会将其对应的member从GroupMetadata中删除,并按照当前GroupMetadta所处的状态进行分类处理。

def onExpireHeartbeat(group: GroupMetadata, member: MemberMetadata, heartbeatDeadline: Long) {
    group synchronized {
        // 再次检测member是否下线
      if (!shouldKeepMemberAlive(member, heartbeatDeadline))
        onMemberFailure(group, member) // member下线后的相关处理操作
    }
  }
  
  private def onMemberFailure(group: GroupMetadata, member: MemberMetadata) {
    trace("Member %s in group %s has failed".format(member.memberId, group.groupId))
    // 将对应的member从GroupMetadata中删除
    group.remove(member.memberId)
    group.currentState match {
      case Dead => // do nothing
      // 之前的分区分配可能已经失效,把GroupMetadta切换成PrepareRebalance状态
      case Stable | AwaitingSync => maybePrepareRebalance(group)
      // GroupMetadata中的member减少,可能满足delayedJoin的指定条件,尝试执行
      case PreparingRebalance => joinPurgatory.checkAndComplete(GroupKey(group.groupId))
    }
  }