当前位置：首页 > news >正文

一些有关ffmpeg 使用（1）

news 来源：原创 2025/4/26 4:55:36

1 解封装流程

1.1 什么解封装

封装的逆向操作：封装是把音频流、视频流、字幕流等不同成分按一定规则组合成视频文件（如 MP4、FLV ），复用器负责此过程。解封装则相反，是用解复用器（针对 MP4、FLV 等格式有对应解复用器）将视频文件再按规则拆分回音频流、视频流、字幕流等各个成分。

流索引标识：解封装后，为区分各流，会给音频流、视频流等分配索引，如图中 audio_index = 1 表示音频流索引为 1 ，video_index = 0 表示视频流索引为 0 。程序后续可依这些索引分别处理对应流，比如解码视频流、播放音频流等。

AVMEDIA_TYPE_VIDEO视频流
video_index = av_find_best_stream(ic, AVMEDIA_TYPE_VIDEO,
-1,-1, NULL, 0)

AVMEDIA_TYPE_AUDIO音频流
audio_index = av_find_best_stream(ic, AVMEDIA_TYPE_AUDIO,
-1,-1, NULL, 0)

1.2 实验

第一部分： :ffmpeg -i in_filename # 打开流

const char *default_filename = "believe.mp4";char *in_filename = NULL;if(argv[1] == NULL){in_filename = default_filename;}else{in_filename = argv[1];}AVFormatContext *ifmt_ctx = NULL//初始化上下文int ret = avformat_open_input(&ifmt_ctx, in_filename, NULL, NULL);//头部信息if (ret < 0)  //如果打开媒体文件失败，打印失败原因{char buf[1024] = { 0 };av_strerror(ret, buf, sizeof(buf) - 1);printf("open %s failed:%s\n", in_filename, buf);goto failed;}ret = avformat_find_stream_info(ifmt_ctx, NULL);//   流的信息if (ret < 0)  //如果打开媒体文件失败，打印失败原因{char buf[1024] = { 0 };av_strerror(ret, buf, sizeof(buf) - 1);printf("avformat_find_stream_info %s failed:%s\n", in_filename, buf);goto failed;}
av_dump_format(ifmt_ctx, 0, in_filename, 0);printf_s("\n==== av_dump_format in_filename:%s ===\n", in_filename);av_dump_format(ifmt_ctx, 0, in_filename, 0);printf_s("\n==== av_dump_format finish =======\n\n");// url: 调用avformat_open_input读取到的媒体文件的路径/名字printf("media name:%s\n", ifmt_ctx->url);// nb_streams: nb_streams媒体流数量printf("stream number:%d\n", ifmt_ctx->nb_streams);// bit_rate: 媒体文件的码率,单位为bpsprintf("media average ratio:%lldkbps\n",(int64_t)(ifmt_ctx->bit_rate/1024));// 时间printf("total duration: %02d:%02d:%02d\n", hour, minute, second);printf("\n");

第二部分:分析流的信息：也是ffmpeg -i in_filename# 打开流

ret = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);if (ret >= 0) {audioindex = ret;AVStream *in_stream = ifmt_ctx->streams[audioindex];printf("----- Audio info:\n");// index: 每个流成分在ffmpeg解复用分析后都有唯一的index作为标识printf("index:%d\n", in_stream->index);// sample_rate: 音频编解码器的采样率，单位为Hzprintf("samplerate:%dHz\n", in_stream->codecpar->sample_rate);// codecpar->format: 音频采样格式if (AV_SAMPLE_FMT_FLTP == in_stream->codecpar->format) {printf("sampleformat:AV_SAMPLE_FMT_FLTP\n");} else if (AV_SAMPLE_FMT_S16P == in_stream->codecpar->format) {printf("sampleformat:AV_SAMPLE_FMT_S16P\n");}// channels: 音频信道数目printf("channel number:%d\n", in_stream->codecpar->channels);// codec_id: 音频压缩编码格式if (AV_CODEC_ID_AAC == in_stream->codecpar->codec_id) {printf("audio codec:AAC\n");} else if (AV_CODEC_ID_MP3 == in_stream->codecpar->codec_id) {printf("audio codec:MP3\n");} else {printf("audio codec_id:%d\n", in_stream->codecpar->codec_id);}// 音频总时长，单位为秒。注意如果把单位放大为毫秒或者微妙，音频总时长跟视频总时长不一定相等的if (in_stream->duration != AV_NOPTS_VALUE) {int duration_audio = (in_stream->duration) * av_q2d(in_stream->time_base);//将音频总时长转换为时分秒的格式打印到控制台上printf("audio duration: %02d:%02d:%02d\n",duration_audio / 3600, (duration_audio % 3600) / 60, (duration_audio % 60));} else {printf("audio duration unknown");}
}

----- Audio info:
index:1
samplerate:48000Hz
sampleformat:AV_SAMPLE_FMT_FLTP
channel number:2
audio codec:AAC
audio duration: 00:03:42

分别对应音频流索引采样率采样精度声道编码器时间

ret = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);if (ret >= 0) {videoindex = ret;AVStream *in_stream = ifmt_ctx->streams[videoindex];printf("----- Video info:\n");printf("index:%d\n", in_stream->index);// avg_frame_rate: 视频帧率,单位为fps，表示每秒出现多少帧printf("fps:%lffps\n", av_q2d(in_stream->avg_frame_rate));if (AV_CODEC_ID_MPEG4 == in_stream->codecpar->codec_id) //视频压缩编码格式{printf("video codec:MPEG4\n");} else if (AV_CODEC_ID_H264 == in_stream->codecpar->codec_id) //视频压缩编码格式{printf("video codec:H264\n");} else {printf("video codec_id:%d\n", in_stream->codecpar->codec_id);}// 视频帧宽度和帧高度printf("width:%d height:%d\n", in_stream->codecpar->width,in_stream->codecpar->height);//视频总时长，单位为秒。注意如果把单位放大为毫秒或者微妙，音频总时长跟视频总时长不一定相等的if (in_stream->duration != AV_NOPTS_VALUE) {int duration_video = (in_stream->duration) * av_q2d(in_stream->time_base);printf("video duration: %02d:%02d:%02d\n",duration_video / 3600,(duration_video % 3600) / 60,(duration_video % 60)); //将视频总时长转换为时分秒的格式打印到控制台上} else {printf("video duration unknown");}printf("\n");}

----- Video info:
index:0
fps:14.464607fps
video codec:H264
width:1920 height:1080
video duration: 00:03:42

视频流索引帧率编码器分辨率时长

第三部分：ffmpeg -i input.mp4 -vn -c:a aac output.aac

 printf("audio profile:%d, FF_PROFILE_AAC_LOW:%d\n",ifmt_ctx->streams[audio_index]->codecpar->profile,FF_PROFILE_AAC_LOW);if(ifmt_ctx->streams[audio_index]->codecpar->codec_id != AV_CODEC_ID_AAC){printf("the media file no contain AAC stream, it's codec_id is %d\n",ifmt_ctx->streams[audio_index]->codecpar->codec_id);goto failed;}// 读取媒体文件，并把aac数据帧写入到本地文件while(av_read_frame(ifmt_ctx, &pkt) >=0 ){if(pkt.stream_index == audio_index){char adts_header_buf[7] = {0};adts_header(adts_header_buf, pkt.size,ifmt_ctx->streams[audio_index]->codecpar->profile,ifmt_ctx->streams[audio_index]->codecpar->sample_rate,ifmt_ctx->streams[audio_index]->codecpar->channels);fwrite(adts_header_buf, 1, 7, aac_fd);  // 写adts header , ts流不适用，ts流分离出来的packet带了adts headerlen = fwrite( pkt.data, 1, pkt.size, aac_fd);   // 写adts dataif(len != pkt.size){av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal pkt.size(%d, %d)\n",len,pkt.size);}}av_packet_unref(&pkt);}

写入aac的head和数据包

第四部分：提取视频流 ffmpeg -i nput.mp4 -an -c:v libx264 output.h264

    pkt = av_packet_alloc();av_init_packet(pkt);const AVBitStreamFilter *bsfilter = av_bsf_get_by_name("h264_mp4toannexb");AVBSFContext *bsf_ctx = NULL;// 2 初始化过滤器上下文av_bsf_alloc(bsfilter, &bsf_ctx); //AVBSFContext;// 3 添加解码器属性avcodec_parameters_copy(bsf_ctx->par_in, ifmt_ctx->streams[videoindex]->codecpar);av_bsf_init(bsf_ctx);file_end = 0;while (0 == file_end){if((ret = av_read_frame(ifmt_ctx, pkt)) < 0){// 没有更多包可读file_end = 1;printf("read file end: ret:%d\n", ret);}if(ret == 0 && pkt->stream_index == videoindex){
#if 1int input_size = pkt->size;int out_pkt_count = 0;if (av_bsf_send_packet(bsf_ctx, pkt) != 0) // bitstreamfilter内部去维护内存空间{av_packet_unref(pkt);   // 你不用了就把资源释放掉continue;       // 继续送}av_packet_unref(pkt);   // 释放资源while(av_bsf_receive_packet(bsf_ctx, pkt) == 0){out_pkt_count++;// printf("fwrite size:%d\n", pkt->size);size_t size = fwrite(pkt->data, 1, pkt->size, outfp);if(size != pkt->size){printf("fwrite failed-> write:%u, pkt_size:%u\n", size, pkt->size);}av_packet_unref(pkt);}if(out_pkt_count >= 2){printf("cur pkt(size:%d) only get 1 out pkt, it get %d pkts\n",input_size, out_pkt_count);}#else       // TS流可以直接写入size_t size = fwrite(pkt->data, 1, pkt->size, outfp);if(size != pkt->size){printf("fwrite failed-> write:%u, pkt_size:%u\n", size, pkt->size);}av_packet_unref(pkt);
#endif

使用了一个过滤器为 AVBitStreamFilter 要为h264一个annexb模式服务

2 解码

上面我们获取了两个流分别为aac的音频流和h264的视频流，现在我们要对这两个流解码

2.1 函数

const AVCodec *codec;
AVCodecContext *codec_ctx= NULL;AVCodecParserContext *parser = NULL// 查找解码器codec = avcodec_find_decoder(audio_codec_id);  // AV_CODEC_ID_AACif (!codec) {fprintf(stderr, "Codec not found\n");exit(1);}// 获取裸流的解析器 AVCodecParserContext(数据)  +  AVCodecParser(方法)parser = av_parser_init(codec->id);if (!parser) {fprintf(stderr, "Parser not found\n");exit(1);}// 分配codec上下文codec_ctx = avcodec_alloc_context3(codec);if (!codec_ctx) {fprintf(stderr, "Could not allocate audio codec context\n");exit(1);}

codec：编解码器
AVCodecContext ：编解码器上下文

基本概念
编解码器上下文（Codec Context）本质上是一个数据结构，它用来存储编解码器在工作过程中所需要的各种参数、状态信息以及中间数据。简单来说，它就是编解码器的 “工作环境”，编解码器依据上下文中的这些信息来完成编码或者解码的操作。
作用
参数配置：编解码器上下文可以对编解码器的各种参数进行配置。以视频编解码器为例，这些参数可能包括视频的分辨率、帧率、比特率、像素格式等；对于音频编解码器，可能涉及采样率、声道数、采样格式等。通过对这些参数的合理配置，能够让编解码器根据具体需求进行工作。
状态管理：它会记录编解码器在运行过程中的状态信息。例如，在解码过程中，可能会记录当前解码到的帧号、是否遇到错误等信息；在编码过程中，可能会记录编码进度、已经编码的帧数等。
数据传递：编解码器上下文还承担着在不同模块之间传递数据的任务。比如，在解码时，输入的编码数据会通过上下文传递给解码器；解码完成后，解码后的原始数据也会通过上下文传递给后续的处理模块。

AVCodecParserContext:⽤于解析输⼊的数据流并把它分成⼀帧⼀帧的压缩编码数据。⽐较形象的说法就是把⻓⻓的⼀段连续的数据“切割”成⼀段段的数据。
解码
- avcodec_send_packet():
  函数int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
  作用：⽀持将裸流数据包送给解码器
- avcodec_receive_frame
  函数：int avcodec_receive_frame ( AVCodecContext * avctx, AVFrame * frame )
  作⽤：从解码器返回已解码的输出数据。
  返回值：AVERROR(EAGAIN): 该状态下没有帧输出，需要使⽤avcodec_send_packet发送新的packet到解码器
  AVERROR_EOF: 解码器已经被完全刷新，不再有输出帧

static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame,FILE *outfile)
{int i, ch;int ret, data_size;/* send the packet with the compressed data to the decoder */ret = avcodec_send_packet(dec_ctx, pkt);if(ret == AVERROR(EAGAIN)){fprintf(stderr, "Receive_frame and send_packet both returned EAGAIN, which is an API violation.\n");}else if (ret < 0){fprintf(stderr, "Error submitting the packet to the decoder, err:%s, pkt_size:%d\n",av_get_err(ret), pkt->size);
//        exit(1);return;}/* read all the output frames (infile general there may be any number of them */while (ret >= 0){// 对于frame, avcodec_receive_frame内部每次都先调用ret = avcodec_receive_frame(dec_ctx, frame);if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)return;else if (ret < 0){fprintf(stderr, "Error during decoding\n");exit(1);}data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);if (data_size < 0){/* This should not occur, checking just for paranoia */fprintf(stderr, "Failed to calculate data size\n");exit(1);}static int s_print_format = 0;if(s_print_format == 0){s_print_format = 1;print_sample_format(frame);}/**P表示Planar（平面），其数据格式排列方式为 :LLLLLLRRRRRRLLLLLLRRRRRRLLLLLLRRRRRRL...（每个LLLLLLRRRRRR为一个音频帧）而不带P的数据格式（即交错排列）排列方式为：LRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRL...（每个LR为一个音频样本）播放范例：   ffplay -ar 48000 -ac 2 -f f32le believe.pcm*/for (i = 0; i < frame->nb_samples; i++){for (ch = 0; ch < dec_ctx->channels; ch++)  // 交错的方式写入, 大部分float的格式输出fwrite(frame->data[ch] + data_size*i, 1, data_size, outfile);}}

是解一个包中的帧的样本点

static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame,FILE *outfile)
{int ret;/* send the packet with the compressed data to the decoder */ret = avcodec_send_packet(dec_ctx, pkt);//发包if(ret == AVERROR(EAGAIN))//错误处理{fprintf(stderr, "Receive_frame and send_packet both returned EAGAIN, which is an API violation.\n");}else if (ret < 0){fprintf(stderr, "Error submitting the packet to the decoder, err:%s, pkt_size:%d\n",av_get_err(ret), pkt->size);return;}/* read all the output frames (infile general there may be any number of them */while (ret >= 0){// 对于frame, avcodec_receive_frame内部每次都先调用ret = avcodec_receive_frame(dec_ctx, frame);if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)return;else if (ret < 0){fprintf(stderr, "Error during decoding\n");exit(1);}static int s_print_format = 0;if(s_print_format == 0){s_print_format = 1;print_video_format(frame);}// 一般H264默认为 AV_PIX_FMT_YUV420P, 具体怎么强制转为 AV_PIX_FMT_YUV420P 在音视频合成输出的时候讲解// frame->linesize[1]  对齐的问题// 正确写法  linesize[]代表每行的字节数量，所以每行的偏移是linesize[]for(int j=0; j<frame->height; j++)fwrite(frame->data[0] + j * frame->linesize[0], 1, frame->width, outfile);for(int j=0; j<frame->height/2; j++)fwrite(frame->data[1] + j * frame->linesize[1], 1, frame->width/2, outfile);for(int j=0; j<frame->height/2; j++)fwrite(frame->data[2] + j * frame->linesize[2], 1, frame->width/2, outfile);// 错误写法 用source.200kbps.766x322_10s.h264测试时可以看出该种方法是错误的//  写入y分量
//        fwrite(frame->data[0], 1, frame->width * frame->height,  outfile);//Y
//        // 写入u分量
//        fwrite(frame->data[1], 1, (frame->width) *(frame->height)/4,outfile);//U:宽高均是Y的一半
//        //  写入v分量
//        fwrite(frame->data[2], 1, (frame->width) *(frame->height)/4,outfile);//V：宽高均是Y的一半}
}

是解一个包中的帧中的yuv

3 自定义io

AVIOContext *avio_alloc_context(unsigned char *buffer,int buffer_size,int write_flag,void *opaque,int (*read_packet)(void *opaque, uint8_t *buf, int buf_size),int (*write_packet)(void *opaque, uint8_t *buf, int buf_size),int64_t (*seek)(void *opaque, int64_t offset, int whence)
);

unsigned char *buffer
指向预先分配好的缓冲区的指针，该缓冲区用于存储从数据源读取的数据（读取模式）或要写入数据源的数据（写入模式）。在调用此函数之前，你需要使用 av_malloc 等内存分配函数为其分配足够的内存空间。
int buffer_size
缓冲区的大小，以字节为单位。该值指定了 buffer 所指向的内存区域的大小。
int write_flag
一个布尔值，用于指示 AVIOContext 的操作模式。
- 0 表示只读模式，即 AVIOContext 仅用于从数据源读取数据。
- 非零值（通常为 1）表示可写模式，即 AVIOContext 用于向数据源写入数据。
void *opaque
一个通用指针，可传递任意用户数据，通常是与数据源相关的上下文信息，如文件指针、网络套接字等。这个指针会被传递给后续的 read_packet、write_packet 和 seek 回调函数，以便在这些函数中使用。
int (*read_packet)(void *opaque, uint8_t *buf, int buf_size)
一个指向读取回调函数的指针，用于从数据源读取数据。当 AVIOContext 需要从数据源读取数据时，会调用这个函数。
函数参数：
void *opaque：即前面传入的 opaque 指针，可用于获取数据源的上下文信息。
uint8_t *buf：指向用于存储读取数据的缓冲区。
int buf_size：缓冲区的大小，即最多可读取的字节数。
函数返回值：成功读取的字节数，如果返回值小于 0，则表示发生了错误。
int (*write_packet)(void *opaque, uint8_t *buf, int buf_size)
一个指向写入回调函数的指针，用于向数据源写入数据。当 AVIOContext 需要向数据源写入数据时，会调用这个函数。
函数参数与 read_packet 类似，只是 buf 中的数据是要写入数据源的数据。
函数返回值：成功写入的字节数，如果返回值小于 0，则表示发生了错误。
int64_t (*seek)(void *opaque, int64_t offset, int whence)
一个指向定位回调函数的指针，用于在数据源中移动读写位置。当 AVIOContext 需要在数据源中定位到特定位置时，会调用这个函数。
函数参数：
void *opaque：同样是前面传入的 opaque 指针。
int64_t offset：相对于 whence 指定位置的偏移量。
int whence：定位的起始位置，取值与标准 C 库中的 fseek 函数的 whence 参数类似，常见取值有 SEEK_SET（文件开头）、SEEK_CUR（当前位置）、SEEK_END（文件末尾）。
函数返回值：新的读写位置，如果返回值小于 0，则表示发生了错误。

 uint8_t *io_buffer = av_malloc(BUF_SIZE);AVIOContext *avio_ctx = avio_alloc_context(io_buffer, BUF_SIZE, 0, (void *)in_file,    \read_packet, NULL, NULL);AVFormatContext *format_ctx = avformat_alloc_context();format_ctx->pb = avio_ctx;

当调用这个的时候

ret = av_read_frame(format_ctx, packet); //使用自定义io

Z-Wave正通过自我革新，重塑在智能家居领域新定位

[FPGA基础] DMA

0基础 | Proteus仿真 | 51单片机 | 继电器

MySQL的MVCC【学习笔记】

01.oracle SQL基础

Django之旅：第七节--模版继承

IDEA配置将Servlet真正布署到Tomcat

Matplotlib高阶技术全景解析（续）：动态交互、三维可视化与性能优化

【初识Trae】字节跳动推出的下一代AI原生IDE，重新定义智能编程

微服务架构在云原生后端的深度融合与实践路径

图论---Kruskal（稀疏图）

PDFMathTranslate：基于LLM的PDF文档翻译及双语对照的工具【使用教程】

Spine 动画教程：皮肤制作

深度学习笔记22-RNN心脏病预测(Tensorflow)

Azure Data Factory ETL设计与调度最佳实践

【RedisLockRegistry】分布式锁

抖音小程序开发常见问题与代码解决方案

【N8N】Docker Desktop + WSL 安装过程（Docker Desktop - WSL update Failed解决方法）

从StandardMaterial和PBRMaterial到PBRMetallicRoughnessMaterial：Babylon.js材质转换完全指南

附赠二张图，阐述我对大模型的生态发展、技术架构认识。

金正恩出席朝鲜人民军海军驱逐舰入水仪式

摩根士丹利基金雷志勇：AI带来的产业演进仍在继续，看好三大景气领域

去年立案侦办侵权假冒案件3.7万起，公安部公布13起案例

印控克什米尔26名游客遭恐袭丧生后，印度对巴宣布多项反制措施

养胃不是顿顿喝粥，这份“胃的使用说明书”请收好

对话地铁读书人｜中学教师董女士：借来的书更好看