FFmpeg使用av_read_frame()方法读取音频流、视频流、字幕流,得到AVPacket数据包。FFmpeg官方提供的samples有使用示例,或者在ffplay.c代码中:打开文件/网络流后,while循环调用av_read_frame()读取帧数据,也就是解封装demux过程,直到文件末尾EOF。

av_read_frame()的调用链如下图所示:

java FFmpeg 获取视频文件总时长_数据

1、av_read_frame声明

av_read_frame方法的声明位于libavformat/avformat.h,如下所示:

/**
 * Return the next frame of a stream.
 * This function returns what is stored in the file, and does not validate
 * that what is there are valid frames for the decoder. It will split what is
 * stored in the file into frames and return one for each call. It will not
 * omit invalid data between valid frames so as to give the decoder the maximum
 * information possible for decoding.
 *
 * On success, the returned packet is reference-counted (pkt->buf is set) and
 * valid indefinitely. The packet must be freed with av_packet_unref() when
 * it is no longer needed. For video, the packet contains exactly one frame.
 * For audio, it contains an integer number of frames if each frame has
 * a known fixed size (e.g. PCM or ADPCM data). If the audio frames have
 * a variable size (e.g. MPEG audio), then it contains one frame.
 *
 * pkt->pts, pkt->dts and pkt->duration are always set to correct
 * values in AVStream.time_base units (and guessed if the format cannot
 * provide them). pkt->pts can be AV_NOPTS_VALUE if the video format
 * has B-frames, so it is better to rely on pkt->dts if you do not
 * decompress the payload.
 *
 * @return 0 if OK, < 0 on error or end of file. On error, pkt will be blank
 *         (as if it came from av_packet_alloc()).
 *
 * @note pkt will be initialized, so it may be uninitialized, but it must not
 *       contain data that needs to be freed.
 */
int av_read_frame(AVFormatContext *s, AVPacket *pkt);

中文大意为:该方法返回文件存储的帧数据,但不会为解码器校验帧是否有效。把文件拆分为若干个帧,每次调用返回一帧数据包。

返回的数据包被引用计数,并且永久有效。如果不再需要该packet包,必须使用av_packet_unref()进行释放。对于视频,每个packet只包含一帧。对于音频,如果是固定大小(比如PCM或ADPCM),返回若干帧;如果是可变大小,它只包含一帧。

pkt->dts、pkt->pts、pkt->duration使用timebase作为单位,总是被设为准确值。如果视频中存在B帧,pkt->pts可能为AV_NOPTS_VALUE,所以最好使用pkt->dts作为依赖。

返回0代表成功,小于0代表错误或文件结束。如果是错误,pkt将会为空。

2、av_read_frame的实现

av_read_frame()方法主要调用avpriv_packet_list_get()和av_read_frame_internal(),分为无genpts与有genpts两种情况进行读取:

int av_read_frame(AVFormatContext *s, AVPacket *pkt)
{
    const int genpts = s->flags & AVFMT_FLAG_GENPTS;
    int eof = 0;
    int ret;
    AVStream *st;
    // 没有设置genpts,直接读取一帧数据
    if (!genpts) {
        ret = s->internal->packet_buffer
              ? avpriv_packet_list_get(&s->internal->packet_buffer,
                                        &s->internal->packet_buffer_end, pkt)
              : read_frame_internal(s, pkt);
        if (ret < 0)
            return ret;
        goto return_packet;
    }
 
    for (;;) {
        PacketList *pktl = s->internal->packet_buffer;
        if (pktl) {
            AVPacket *next_pkt = &pktl->pkt;
            if (next_pkt->dts != AV_NOPTS_VALUE) {
                int wrap_bits = s->streams[next_pkt->stream_index]->pts_wrap_bits;
                int64_t last_dts = next_pkt->dts;
                while (pktl && next_pkt->pts == AV_NOPTS_VALUE) {
                    if (pktl->pkt.stream_index == next_pkt->stream_index &&
                        av_compare_mod(next_pkt->dts, pktl->pkt.dts, 2ULL << (wrap_bits - 1)) < 0) {
                        if (av_compare_mod(pktl->pkt.pts, pktl->pkt.dts, 2ULL << (wrap_bits - 1))) {
                            // 没有B帧
                            next_pkt->pts = pktl->pkt.dts;
                        }
                        if (last_dts != AV_NOPTS_VALUE) {
                            last_dts = pktl->pkt.dts;
                        }
                    }
                    pktl = pktl->next;
                }
                if (eof && next_pkt->pts == AV_NOPTS_VALUE && last_dts != AV_NOPTS_VALUE) {
                    next_pkt->pts = last_dts + next_pkt->duration;
                }
                pktl = s->internal->packet_buffer;
            }
            // 从packet缓冲区读取packet
            st = s->streams[next_pkt->stream_index];
            if (!(next_pkt->pts == AV_NOPTS_VALUE && st->discard < AVDISCARD_ALL &&
                  next_pkt->dts != AV_NOPTS_VALUE && !eof)) {
                ret = avpriv_packet_list_get(&s->internal->packet_buffer,
                                               &s->internal->packet_buffer_end, pkt);
                goto return_packet;
            }
        }
        // 调用内部方法读取一帧
        ret = read_frame_internal(s, pkt);
        if (ret < 0) {
            if (pktl && ret != AVERROR(EAGAIN)) {
                eof = 1;
                continue;
            } else
                return ret;
        }
        ret = avpriv_packet_list_put(&s->internal->packet_buffer,
                                 &s->internal->packet_buffer_end,
                                 pkt, NULL, 0);
        if (ret < 0) {
            av_packet_unref(pkt);
            return ret;
        }
    }
return_packet:
    ......
    return ret;
}

3、av_read_frame_internal

av_read_frame_internal()为内部读取一帧数据,主要调用ff_read_packet()和parse_packet(),具体代码如下:

static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
{
    int ret, i, got_packet = 0;
    AVDictionary *metadata = NULL;
 
    while (!got_packet && !s->internal->parse_queue) {
        AVStream *st;
        // 读取下一个数据包
        ret = ff_read_packet(s, pkt);
        if (ret < 0) {
            if (ret == AVERROR(EAGAIN))
                return ret;
            // 刷新解析器
            for (i = 0; i < s->nb_streams; i++) {
                st = s->streams[i];
                if (st->parser && st->need_parsing)
                    parse_packet(s, pkt, st->index, 1);
            }
            break;
        }
        ret = 0;
        st  = s->streams[pkt->stream_index];
        st->event_flags |= AVSTREAM_EVENT_FLAG_NEW_PACKETS;
        if (st->internal->need_context_update) {
            if (avcodec_is_open(st->internal->avctx)) {
                avcodec_close(st->internal->avctx);
                st->internal->info->found_decoder = 0;
            }
            // 关闭解析器,因为它依赖于codec
            if (st->parser && st->internal->avctx->codec_id != st->codecpar->codec_id) {
                av_parser_close(st->parser);
                st->parser = NULL;
            }
            ret = avcodec_parameters_to_context(st->internal->avctx, st->codecpar);
            if (ret < 0) {
                av_packet_unref(pkt);
                return ret;
            }
            st->internal->need_context_update = 0;
        }
        if (st->need_parsing && !st->parser && !(s->flags & AVFMT_FLAG_NOPARSE)) {
			// 初始化解析器
            st->parser = av_parser_init(st->codecpar->codec_id);
            if (!st->parser) {
                st->need_parsing = AVSTREAM_PARSE_NONE;
            } else if (st->need_parsing == AVSTREAM_PARSE_HEADERS)
                st->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
            else if (st->need_parsing == AVSTREAM_PARSE_FULL_ONCE)
                st->parser->flags |= PARSER_FLAG_ONCE;
            else if (st->need_parsing == AVSTREAM_PARSE_FULL_RAW)
                st->parser->flags |= PARSER_FLAG_USE_CODEC_TS;
        }
        if (!st->need_parsing || !st->parser) {
            // 不需要解析,直接输出packet
            compute_pkt_fields(s, st, NULL, pkt, AV_NOPTS_VALUE, AV_NOPTS_VALUE);
            if ((s->iformat->flags & AVFMT_GENERIC_INDEX) &&
                (pkt->flags & AV_PKT_FLAG_KEY) && pkt->dts != AV_NOPTS_VALUE) {
                ff_reduce_index(s, st->index);
                av_add_index_entry(st, pkt->pos, pkt->dts,
                                   0, 0, AVINDEX_KEYFRAME);
            }
            got_packet = 1;
        } else if (st->discard < AVDISCARD_ALL) {
			// 解析packet
            if ((ret = parse_packet(s, pkt, pkt->stream_index, 0)) < 0)
                return ret;
            st->codecpar->sample_rate = st->internal->avctx->sample_rate;
            st->codecpar->bit_rate = st->internal->avctx->bit_rate;
            st->codecpar->channels = st->internal->avctx->channels;
            st->codecpar->channel_layout = st->internal->avctx->channel_layout;
            st->codecpar->codec_id = st->internal->avctx->codec_id;
        } else {
            av_packet_unref(pkt);
        }
        if (pkt->flags & AV_PKT_FLAG_KEY)
            st->internal->skip_to_keyframe = 0;
        if (st->internal->skip_to_keyframe) {
            av_packet_unref(pkt);
            got_packet = 0;
        }
    }
    ......
 
    return ret;
}

4、ff_read_packet

ff_read_packet()主要是调用AVInputFormat指向的read_packet()方法来读取数据包:

int ff_read_packet(AVFormatContext *s, AVPacket *pkt)
{
    ......
    for (;;) {
        PacketList *pktl = s->internal->raw_packet_buffer;
        const AVPacket *pkt1;
        if (pktl) {
            st = s->streams[pktl->pkt.stream_index];
            if (s->internal->raw_packet_buffer_remaining_size <= 0)
                if ((err = probe_codec(s, st, NULL)) < 0)
                    return err;
            if (st->internal->request_probe <= 0) {
                avpriv_packet_list_get(&s->internal->raw_packet_buffer,
                                   &s->internal->raw_packet_buffer_end, pkt);
                s->internal->raw_packet_buffer_remaining_size += pkt->size;
                return 0;
            }
        }
        // 读取数据包
        ret = s->iformat->read_packet(s, pkt);
        if (ret < 0) {
            av_packet_unref(pkt);
            if (ret == FFERROR_REDO)
                continue;
            if (!pktl || ret == AVERROR(EAGAIN))
                return ret;
            for (i = 0; i < s->nb_streams; i++) {
                st = s->streams[i];
                if (st->probe_packets || st->internal->request_probe > 0)
                    if ((err = probe_codec(s, st, NULL)) < 0)
                        return err;
            }
            continue;
        }
        err = av_packet_make_refcounted(pkt);
        if (err < 0) {
            av_packet_unref(pkt);
            return err;
        }
        if (pkt->flags & AV_PKT_FLAG_CORRUPT) {
            if (s->flags & AVFMT_FLAG_DISCARD_CORRUPT) {
                av_log(s, AV_LOG_WARNING, ", dropping it.\n");
                av_packet_unref(pkt);
                continue;
            }
        }
        st = s->streams[pkt->stream_index];
        if (update_wrap_reference(s, st, pkt->stream_index, pkt) 
			&& st->internal->pts_wrap_behavior == AV_PTS_WRAP_SUB_OFFSET) {
            // 校正第一个时间戳
            if (!is_relative(st->first_dts))
                st->first_dts = wrap_timestamp(st, st->first_dts);
            if (!is_relative(st->start_time))
                st->start_time = wrap_timestamp(st, st->start_time);
            if (!is_relative(st->cur_dts))
                st->cur_dts = wrap_timestamp(st, st->cur_dts);
        }
        pkt->dts = wrap_timestamp(st, pkt->dts);
        pkt->pts = wrap_timestamp(st, pkt->pts);
        force_codec_ids(s, st);
        /* TODO: audio: time filter; video: frame reordering (pts != dts) */
        if (s->use_wallclock_as_timestamps)
            pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base);
        if (!pktl && st->internal->request_probe <= 0)
            return ret;
        err = avpriv_packet_list_put(&s->internal->raw_packet_buffer,
                                 &s->internal->raw_packet_buffer_end,
                                 pkt, NULL, 0);
        if (err < 0) {
            av_packet_unref(pkt);
            return err;
        }
        pkt1 = &s->internal->raw_packet_buffer_end->pkt;
        s->internal->raw_packet_buffer_remaining_size -= pkt1->size;
        if ((err = probe_codec(s, st, pkt1)) < 0)
            return err;
    }
}

其中,read_packet()为函数指针。以mp4的解封装过程为例,位于libavformat/mov.c,对应的AVInputFormat如下:

AVInputFormat ff_mov_demuxer = {
    .name           = "mov,mp4,m4a,3gp,3g2,mj2",
    .long_name      = NULL_IF_CONFIG_SMALL("QuickTime / MOV"),
    .priv_class     = &mov_class,
    .priv_data_size = sizeof(MOVContext),
    .extensions     = "mov,mp4,m4a,3gp,3g2,mj2,psp,m4b,ism,ismv,isma,f4v",
    .read_probe     = mov_probe,
    .read_header    = mov_read_header,
    .read_packet    = mov_read_packet,
    .read_close     = mov_read_close,
    .read_seek      = mov_read_seek,
    .flags          = AVFMT_NO_BYTE_SEEK | AVFMT_SEEK_TO_PTS,
};

那么iformat->read_packet()所指向的函数为mov_read_packet(),具体代码如下:

static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
{
    MOVContext *mov = s->priv_data;
    MOVStreamContext *sc;
    AVIndexEntry *sample;
    AVStream *st = NULL;
    int64_t current_index;
    int ret;
    mov->fc = s;
 retry:
    // 查找下一个sample
    sample = mov_find_next_sample(s, &st);
    ......
    if (st->discard != AVDISCARD_ALL) {
        // 读取数据包
        if (st->codecpar->codec_id == AV_CODEC_ID_EIA_608 && sample->size > 8)
            ret = get_eia608_packet(sc->pb, pkt, sample->size);
        else
            ret = av_get_packet(sc->pb, pkt, sample->size);
        if (ret < 0) {
            if (should_retry(sc->pb, ret)) {
                mov_current_sample_dec(sc);
            }
            return ret;
        }
    }
    ......
    if (sc->ctts_data && sc->ctts_index < sc->ctts_count) {
        pkt->pts = pkt->dts + sc->dts_shift + sc->ctts_data[sc->ctts_index].duration;
        // 更新ctts上下文
        sc->ctts_sample++;
        if (sc->ctts_index < sc->ctts_count &&
            sc->ctts_data[sc->ctts_index].count == sc->ctts_sample) {
            sc->ctts_index++;
            sc->ctts_sample = 0;
        }
    } else {
        int64_t next_dts = (sc->current_sample < st->nb_index_entries) ?
            st->index_entries[sc->current_sample].timestamp : st->duration;
        if (next_dts >= pkt->dts)
            pkt->duration = next_dts - pkt->dts;
        pkt->pts = pkt->dts;
    }
    ......
    return 0;
}

5、parse_packet

parse_packet()主要调用av_parser_parse2()来实现解析,代码如下:

static int parse_packet(AVFormatContext *s, AVPacket *pkt,
                        int stream_index, int flush)
{
    ......
    while (size > 0 || (flush && got_output)) {
        int len;
        int64_t next_pts = pkt->pts;
        int64_t next_dts = pkt->dts;
        // 真正去解析数据包
        len = av_parser_parse2(st->parser, st->internal->avctx,
                               &out_pkt->data, &out_pkt->size, data, size,
                               pkt->pts, pkt->dts, pkt->pos);
        ......
        // 设置时长
        out_pkt->duration = (st->parser->flags & PARSER_FLAG_COMPLETE_FRAMES) ? pkt->duration : 0;
        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            if (st->internal->avctx->sample_rate > 0) {
                out_pkt->duration =
                    av_rescale_q_rnd(st->parser->duration,
                                     (AVRational) { 1, st->internal->avctx->sample_rate },
                                     st->time_base,
                                     AV_ROUND_DOWN);
            }
        }
        ......
		// flag设为关键帧
        if (st->parser->key_frame == 1 ||
            (st->parser->key_frame == -1 &&
             st->parser->pict_type == AV_PICTURE_TYPE_I))
            out_pkt->flags |= AV_PKT_FLAG_KEY;
        if (st->parser->key_frame == -1 && st->parser->pict_type ==AV_PICTURE_TYPE_NONE && (pkt->flags&AV_PKT_FLAG_KEY))
            out_pkt->flags |= AV_PKT_FLAG_KEY;
        ......
    }
    if (flush) {
        av_parser_close(st->parser);
        st->parser = NULL;
    }
fail:
    av_packet_unref(pkt);
    return ret;
}

最终是调用libavcodec/parser.c的av_parser_parse2()方法去解析数据包。