LoginSignup
5
5

More than 1 year has passed since last update.

FFmpeg API を使用して映像ファイルの読み書きをする

Last updated at Posted at 2021-09-13

はじめに

これまでは映像データを取り扱うときにOpenCVのVideoCaptureやVideoWriterを使っていたのですが、FFmpegのAPIを使ったほうがいろいろと融通が利くかなと思ったので、今回はそのコードを書いてみました。
(googleで検索をかけると avcodec_decode_video2() や avpicture_fill() などを使ったコードが見つかるのですが、これらの関数は現在deprecatedとなっているので、今のAPIに即したものを書いてみました。)

コード

ということで、映像ファイル(ここではmp4)を読みこみ、そのネガポジ反転をした映像を書き込むための試験的なコードがこちら。デコードした生のデータ処理自体はOpenCVで行うのがいいかもしれません。
(2021/09/14 追記:タイムスタンプ関連の箇所を変更しました)
(2021/09/17 追記:メモリ解放関連の箇所を修正しました)

#include <iostream>
extern "C"{
    #include <libavformat/avformat.h>
    #include <libavcodec/avcodec.h>
    #include <libavutil/imgutils.h>
    #include <libavutil/opt.h>
    #include <libswscale/swscale.h>
    #include <libswresample/swresample.h>
}

void decode_and_encode_video(const char *output, const char *input);

int main(int argc, char *argv[]){
    const char *input = argv[1];
    const char *output = argv[2];
    decode_and_encode_video(output, input);
    return 0;
}

void decode_and_encode_video(const char *output, const char *input){
    AVFormatContext *inputFmtContxt = NULL;
    AVFormatContext *outputFmtContxt = NULL;
    const AVCodec *encoder = NULL;
    const AVCodec *decoder = NULL;
    AVCodecContext *encoderContxt = NULL;
    AVCodecContext *decoderContxt = NULL;
    int ret = 0, video_stream_index = 0;
    ret = avformat_open_input(&inputFmtContxt, input, NULL, NULL);
    if (ret < 0){
        std::cout << "Could not open input video" << std::endl;
    }
    ret = avformat_find_stream_info(inputFmtContxt, NULL);
    if (ret < 0){
        std::cout << "Could not find the stream info" << std::endl;
    }
    const AVOutputFormat *outFmt = av_guess_format("mp4", NULL, NULL);
    avformat_alloc_output_context2(&outputFmtContxt, outFmt, NULL, NULL);
    //デコーダーとエンコーダーを設定
    for (int i=0; i<(int)inputFmtContxt->nb_streams; ++i){
        AVStream *in_stream = inputFmtContxt->streams[i];
        AVCodecParameters *in_par = in_stream->codecpar;
        AVStream *out_stream = avformat_new_stream(outputFmtContxt, NULL);
        if (in_par->codec_type == AVMEDIA_TYPE_VIDEO){
            video_stream_index = i;
            decoder = avcodec_find_decoder(in_par->codec_id);
            decoderContxt = avcodec_alloc_context3(decoder);
            avcodec_parameters_to_context(decoderContxt, in_par);
            decoderContxt->framerate = in_stream->r_frame_rate;
            decoderContxt->time_base = in_stream->time_base;
            avcodec_open2(decoderContxt, decoder, NULL);
            encoder = avcodec_find_encoder(in_par->codec_id);
            encoderContxt = avcodec_alloc_context3(encoder);
            encoderContxt->height = decoderContxt->height;
            encoderContxt->width = decoderContxt->width;
            encoderContxt->pix_fmt = decoderContxt->pix_fmt;
            encoderContxt->qmax = 31;
            encoderContxt->qmin = 2;
            encoderContxt->qcompress = 0.6;
            encoderContxt->max_qdiff = 4;
            encoderContxt->gop_size = 250;
            encoderContxt->keyint_min = 25;
            encoderContxt->max_b_frames = 16;
            encoderContxt->refs = 6;
            encoderContxt->framerate = in_stream->r_frame_rate;
            encoderContxt->time_base = in_stream->time_base;
            encoderContxt->bit_rate = decoderContxt->bit_rate;
            encoderContxt->flags = AV_CODEC_FLAG_GLOBAL_HEADER;
            av_opt_set(encoderContxt->priv_data, "preset", "fast", 0);
            av_opt_set(encoderContxt->priv_data, "tune", "zerolatency", 0);
            avcodec_open2(encoderContxt, encoder, NULL);
            out_stream->time_base = encoderContxt->time_base;
            avcodec_parameters_from_context(out_stream->codecpar, encoderContxt);
        }
        else{
            ret = avcodec_parameters_copy(out_stream->codecpar, in_par);
        }
    }
    //出力ファイルを準備
    av_dump_format(outputFmtContxt, 0, output, 1);
    avio_open(&outputFmtContxt->pb, output, AVIO_FLAG_WRITE);
    ret = avformat_write_header(outputFmtContxt, NULL);
    //YUV と RGB 間の変換用
    enum AVPixelFormat bgr_pix_fmt = AV_PIX_FMT_BGR24;
    int HEIGHT = decoderContxt->height;
    int WIDTH = decoderContxt->width;
    SwsContext *yuv2bgr = sws_getContext(WIDTH, HEIGHT, decoderContxt->pix_fmt, 
                                          WIDTH, HEIGHT, bgr_pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
    SwsContext *bgr2yuv = sws_getContext(WIDTH, HEIGHT, bgr_pix_fmt,
                                          WIDTH, HEIGHT, encoderContxt->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
    //パケットとフレームの準備
    int res = 0;
    AVPacket *packet = av_packet_alloc();
    AVPacket *out_packet = av_packet_alloc();
    out_packet->data = NULL;
    out_packet->size = 0;
    // デコーダーから受け取るフレーム
    AVFrame *frame = av_frame_alloc();
    // RGBへの変換先のフレーム
    AVFrame *bgrframe = av_frame_alloc();
    bgrframe->width = decoderContxt->width;
    bgrframe->height = decoderContxt->height;
    bgrframe->format = bgr_pix_fmt;
    ret = av_frame_get_buffer(bgrframe, 0);
    uint8_t *buf = (uint8_t*) av_malloc(av_image_get_buffer_size(AV_PIX_FMT_BGR24, decoderContxt->width, decoderContxt->height, 1));
    ret = av_image_fill_arrays(bgrframe->data, bgrframe->linesize, buf, AV_PIX_FMT_BGR24, decoderContxt->width, decoderContxt->height, 1);
    // エンコーダ―に渡すフレーム
    AVFrame *outframe = av_frame_alloc();
    outframe->width = decoderContxt->width;
    outframe->height = decoderContxt->height;
    outframe->format = decoderContxt->pix_fmt;
    ret = av_frame_get_buffer(outframe, 0);
    uint8_t *outbuf = (uint8_t*) av_malloc(av_image_get_buffer_size(AV_PIX_FMT_YUV420P, decoderContxt->width, decoderContxt->height, 1));
    ret = av_image_fill_arrays(outframe->data, outframe->linesize, outbuf, AV_PIX_FMT_YUV420P, decoderContxt->width, decoderContxt->height, 1);
    //デコードとエンコードを開始
    while (true){
        ret = av_read_frame(inputFmtContxt, packet);
        if (ret < 0){
            break;
        }
        AVStream *input_stream = inputFmtContxt->streams[packet->stream_index];
        AVStream *output_stream = outputFmtContxt->streams[packet->stream_index];
        if (input_stream->codecpar->codec_type == video_stream_index){
            res = avcodec_send_packet(decoderContxt, packet);
            while (res >= 0){
                res = avcodec_receive_frame(decoderContxt, frame);
                if (res == AVERROR(EAGAIN) || res == AVERROR_EOF){
                    break;
                }
                if (res >= 0){
                    outframe->pict_type = frame->pict_type;
                    outframe->pts = frame->pts;
                    outframe->pkt_dts = frame->pkt_dts;
                    outframe->pkt_duration = frame->pkt_duration;
                    ret = av_frame_make_writable(outframe);
                    //YUVフレームをRGBフレームに変換
                    sws_scale(yuv2bgr, frame->data, frame->linesize, 0, frame->height, bgrframe->data, bgrframe->linesize);
                    //ネガポジ反転
                    int h = bgrframe->height;
                    int l = bgrframe->linesize[0];
                    for (int i=0; i<h; ++i){
                        for (int j=0; j<l; ++j){
                            bgrframe->data[0][i * l + j] = 255 - bgrframe->data[0][i * l + j];
                        }
                    }
                    //RGBからYUVに戻す
                    sws_scale(bgr2yuv, bgrframe->data, bgrframe->linesize, 0, h, outframe->data, outframe->linesize);
                    res = avcodec_send_frame(encoderContxt, outframe);
                    //フレームの書き込み
                    while (res >= 0){
                        res = avcodec_receive_packet(encoderContxt, out_packet);
                        if (res == AVERROR(EAGAIN) || res == AVERROR_EOF){
                            break;
                        }
                        out_packet->pts = av_rescale_q_rnd(outframe->pts, input_stream->time_base, output_stream->time_base, AV_ROUND_NEAR_INF);
                        out_packet->dts = outframe->pts;
                        out_packet->duration = av_rescale_q(packet->duration, input_stream->time_base, output_stream->time_base);
                        res = av_interleaved_write_frame(outputFmtContxt, out_packet);
                    }
                    av_packet_unref(out_packet);
                }
                av_frame_unref(frame);
            }
            av_packet_unref(packet);
        }
        else{
            //音声データはそのまま書き込み
            packet->pts = av_rescale_q_rnd(packet->pts, input_stream->time_base, output_stream->time_base, AV_ROUND_NEAR_INF);
            packet->dts = av_rescale_q_rnd(packet->dts, input_stream->time_base, output_stream->time_base, AV_ROUND_NEAR_INF);
            packet->duration = av_rescale_q(packet->duration, input_stream->time_base, output_stream->time_base);
            res = av_interleaved_write_frame(outputFmtContxt, packet);
            av_packet_unref(packet);
        }
    }
    //各メモリの解放
    av_packet_free(&packet);
    av_frame_free(&frame);
    avformat_free_context(inputFmtContxt);
    avcodec_free_context(&decoderContxt);
    av_packet_free(&out_packet);
    av_write_trailer(outputFmtContxt);
    avformat_free_context(outputFmtContxt);
    avcodec_free_context(&encoderContxt);
    av_freep(&buf);
    av_freep(&outbuf);
    sws_freeContext(yuv2bgr);
    sws_freeContext(bgr2yuv);
}
5
5
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
5
5