订阅推送架构消息订阅和推送

转载

智慧编织者 2023-07-30 15:55:22

文章标签 订阅推送架构 golang 大数据客户端推送消息 文章分类 架构后端开发

写在前面：消息订阅与推送都通过nsq的tcp服务实现。

关于消息的推送最重要的是两个文件：nsqd/protocol_v2.go和nsqd/client_v2.go。

当一个客户端与nsqd进程建立了一个tcp连接时，会调用protocolV2.IOLoop方法，并新建一个clientV2结构体对象。IOLoop方法会启动一个协程执行messagePump方法。并且对于每一个tcp连接，都会有两个协程：运行IOLoop的协程用于接收客户端的请求；运行messagePump的负责处理数据，把数据给客户端clientV2推送给客户端。

下面具体来讲

首先需要客户端订阅消息，会调用“SUB”接口，最终会到nsqd/protocol_v2.go中的SUB函数，函数逻辑如下

func (p *protocolV2) SUB(client *clientV2, params [][]byte) ([]byte, error) {
    // 客户端状态必须是初始化状态
	if atomic.LoadInt32(&client.State) != stateInit {
		return nil, protocol.NewFatalClientErr(nil, "E_INVALID", "cannot SUB in current state")
	}

    // 心跳间隔不能小于等于0
	if client.HeartbeatInterval <= 0 {
		return nil, protocol.NewFatalClientErr(nil, "E_INVALID", "cannot SUB with heartbeats disabled")
	}

    // 参数长度必须为3
    // params[0]为“SUB”，其实就是url的path
    // params[1]为topicName
    // params[2]为channelName
	if len(params) < 3 {
		return nil, protocol.NewFatalClientErr(nil, "E_INVALID", "SUB insufficient number of parameters")
	}

    // 检查topicName的有效性，主要是格式有效性
	topicName := string(params[1])
	if !protocol.IsValidTopicName(topicName) {
		return nil, protocol.NewFatalClientErr(nil, "E_BAD_TOPIC",
			fmt.Sprintf("SUB topic name %q is not valid", topicName))
	}

    // 检查channelName的有效性，主要是格式有效性
	channelName := string(params[2])
	if !protocol.IsValidChannelName(channelName) {
		return nil, protocol.NewFatalClientErr(nil, "E_BAD_CHANNEL",
			fmt.Sprintf("SUB channel name %q is not valid", channelName))
	}

    // 校验权限
	if err := p.CheckAuth(client, "SUB", topicName, channelName); err != nil {
		return nil, err
	}

	// 下面的循环是为了避免将客户端加入到一个已经退出的topic或者channel
	var channel *Channel
	for {
		topic := p.ctx.nsqd.GetTopic(topicName)
		channel = topic.GetChannel(channelName)
		// 客户端消费channel有最大数量的限制
		if err := channel.AddClient(client.ID, client); err != nil {
			return nil, protocol.NewFatalClientErr(nil, "E_TOO_MANY_CHANNEL_CONSUMERS",
				fmt.Sprintf("channel consumers for %s:%s exceeds limit of %d",
					topicName, channelName, p.ctx.nsqd.getOpts().MaxChannelConsumers))
		}

		if (channel.ephemeral && channel.Exiting()) || (topic.ephemeral && topic.Exiting()) {
			channel.RemoveClient(client.ID)
			time.Sleep(1 * time.Millisecond)
			continue
		}
		break
	}
	// 标记客户端状态为“已订阅”
	atomic.StoreInt32(&client.State, stateSubscribed)
	client.Channel = channel
	// 更新message pump，开始推送消息
	client.SubEventChan <- channel

	return okBytes, nil
}

然后到messagePump中

func (p *protocolV2) messagePump(client *clientV2, startedChan chan bool) {
    ...
    // 这里把之前的client的SubEventChan赋给了subEventChan
	subEventChan := client.SubEventChan
	...

	for {
		if subChannel == nil || !client.IsReadyForMessages() {
			// the client is not ready to receive messages...
			memoryMsgChan = nil
			backendMsgChan = nil
			flusherChan = nil
			// force flush
			client.writeLock.Lock()
			err = client.Flush()
			client.writeLock.Unlock()
			if err != nil {
				goto exit
			}
			flushed = true
		} else if flushed {
			// last iteration we flushed...
			// do not select on the flusher ticker channel
			memoryMsgChan = subChannel.memoryMsgChan
			backendMsgChan = subChannel.backend.ReadChan()
			flusherChan = nil
		} else {
			// we're buffered (if there isn't any more data we should flush)...
			// select on the flusher ticker channel, too
			// 用于接收和推送消息
			memoryMsgChan = subChannel.memoryMsgChan
			backendMsgChan = subChannel.backend.ReadChan()
			flusherChan = outputBufferTicker.C
		}

		select {
        ...
		case subChannel = <-subEventChan:
			// 把subEventChan设置为nil是为了保证一个客户端不重复订阅
			subEventChan = nil
		...
		case b := <-backendMsgChan:
		    // 开发推送订阅的消息
			if sampleRate > 0 && rand.Int31n(100) > sampleRate {
				continue
			}

			msg, err := decodeMessage(b)
			if err != nil {
				p.ctx.nsqd.logf(LOG_ERROR, "failed to decode message - %s", err)
				continue
			}
			msg.Attempts++
            // 封装msg，并做消息记录，详见下方的介绍
			subChannel.StartInFlightTimeout(msg, client.ID, msgTimeout)
			// 设置消息发送中状态值，InFlightCount+1，MessageCount+1
			client.SendingMessage()
			// 发送msg数据给客户端
			err = p.SendMessage(client, msg)
			if err != nil {
				goto exit
			}
			flushed = false
		case msg := <-memoryMsgChan:
			if sampleRate > 0 && rand.Int31n(100) > sampleRate {
				continue
			}
			msg.Attempts++

			subChannel.StartInFlightTimeout(msg, client.ID, msgTimeout)
			client.SendingMessage()
			err = p.SendMessage(client, msg)
			if err != nil {
				goto exit
			}
			flushed = false
		case <-client.ExitChan:
			goto exit
		}
	}

exit:
	p.ctx.nsqd.logf(LOG_INFO, "PROTOCOL(V2): [%s] exiting messagePump", client)
	heartbeatTicker.Stop()
	outputBufferTicker.Stop()
	if err != nil {
		p.ctx.nsqd.logf(LOG_ERROR, "PROTOCOL(V2): [%s] messagePump error - %s", client, err)
	}
}

消息推送会调用StartInFlightTimeout，然后设置一些标志位，用于判断是否可以继续推送消息的情况判断

下面看一下StartInFlightTimeout做了什么

func (c *Channel) StartInFlightTimeout(msg *Message, clientID int64, timeout time.Duration) error {
    // 包装一下Message结构体
	now := time.Now()
	msg.clientID = clientID
	msg.deliveryTS = now
	msg.pri = now.Add(timeout).UnixNano()
	// 
	err := c.pushInFlightMessage(msg)
	if err != nil {
		return err
	}
	// 放入是以timeout时间作为优先级的优先级队列（最小堆），用于保存发送给客户端但是还没有被确认的消息
	c.addToInFlightPQ(msg)
	return nil
}

// pushInFlightMessage 将message原子地放入map中，后面会用于关闭channel
func (c *Channel) pushInFlightMessage(msg *Message) error {
	c.inFlightMutex.Lock()
	_, ok := c.inFlightMessages[msg.ID]
	if ok {
		c.inFlightMutex.Unlock()
		return errors.New("ID already in flight")
	}
	c.inFlightMessages[msg.ID] = msg
	c.inFlightMutex.Unlock()
	return nil
}

消息推送到此结束，消息推送完，消费端消费完消息需要调用“FIN”接口，来表示此消息已经消费完，逻辑如下

func (p *protocolV2) FIN(client *clientV2, params [][]byte) ([]byte, error) {
    // 查看客户端状态必须为订阅或者关闭状态
	state := atomic.LoadInt32(&client.State)
	if state != stateSubscribed && state != stateClosing {
		return nil, protocol.NewFatalClientErr(nil, "E_INVALID", "cannot FIN in current state")
	}

    // params的长度必须大于2，params[0]为“FIN”, params[1]为id的指针
	if len(params) < 2 {
		return nil, protocol.NewFatalClientErr(nil, "E_INVALID", "FIN insufficient number of params")
	}

    // 解析id
	id, err := getMessageID(params[1])
	if err != nil {
		return nil, protocol.NewFatalClientErr(nil, "E_INVALID", err.Error())
	}
   
    // 此操作为StartInFlightTimeout的逆操作，并加了一个状态跟踪的的功能，不赘述
	err = client.Channel.FinishMessage(client.ID, *id)
	if err != nil {
		return nil, protocol.NewClientErr(err, "E_FIN_FAILED",
			fmt.Sprintf("FIN %s failed %s", *id, err.Error()))
	}

    // FinishCount+1, InFlightCount-1, 并给ReadyStateChan通道赋值，推动messagePump进入下一个循环
	client.FinishedMessage()

	return nil, nil
}