FFmpeg APIと FreeType API を使用してアニメーションを作る

FreeType というのはフォントのレンダリングを行うためのライブラリで、iOSやAndroidなどで使われているようです(公式サイトより)。


使用したフォントは NotoSansMono-Regular.ttf。

#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cassert>
#include <vector>
#include <string>
#include <ft2build.h>
#include <freetype/ftbitmap.h>
#include FT_FREETYPE_H
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>

int main(int argc, char *argv[]){
    std::string chars_to_use = "abcd;#&@";
    std::vector<char> char_list(chars_to_use.size());
    for (int i=0; i<(int)chars_to_use.size(); ++i){
        char_list[i] = chars_to_use[i];
    const char *font = "NotoSansMono-Regular.ttf";
    int width = atoi(argv[1]);
    FT_Library library;
    FT_Face face;
    FT_GlyphSlot slot;
    FT_Error error;
    error = FT_Init_FreeType(&library);
    if (error){
        std::cout << "error" << std::endl;
    error = FT_New_Face(library, font, 0, &face);
    if (error == FT_Err_Unknown_File_Format){
        std::cout << "unknown format" << std::endl;
    else if(error){
        std::cout << "error" << std::endl;
    error = FT_Select_Charmap(face, FT_ENCODING_UNICODE);
    if (error){
        std::cout << "Charmap error" << std::endl;
    double ratio = (double) 3 / 3;
    int height = (int)round(ratio * (double)width);
    error = FT_Set_Pixel_Sizes(face, width, height);
    if (error){
        std::cout << "size setting error" << std::endl;
    int num_char = (int)char_list.size();
    std::vector<int> top_list(num_char);
    std::vector<int> bottom_list(num_char);
    for (int i=0; i<num_char; ++i){
        FT_ULong ch = char_list[i];
        FT_UInt index = FT_Get_Char_Index(face, ch);
        error = FT_Load_Glyph(face, index, FT_LOAD_RENDER);
        if (error){
            std::cout << "glyph loading error" << std::endl;
        slot = face->glyph;
        FT_Render_Glyph(slot, FT_RENDER_MODE_MONO);
        int h = slot->bitmap.rows;
        int t = slot->bitmap_top;
        int b = h - t;
        top_list[i] = t;
        bottom_list[i] = b;
        FT_Bitmap_Done(library, &slot->bitmap);
    int max_top = *std::max_element(top_list.begin(), top_list.end());
    int max_bottom = *std::max_element(bottom_list.begin(), bottom_list.end());
    int y_blanc = 10;
    int base_line = max_top + y_blanc;
    int window_w = width;
    int window_h = max_top + max_bottom + 2 * y_blanc;
    cv::Mat Window = cv::Mat(window_h, window_w*num_char, CV_8UC1, cv::Scalar(0));
    for (int i=0; i<num_char; ++i){
        FT_ULong ch = char_list[i];
        FT_UInt index = FT_Get_Char_Index(face, ch);
        error = FT_Load_Glyph(face, index, FT_LOAD_RENDER);
        if (error){
            std::cout << "glyph loading error" << std::endl;
        slot = face->glyph;
        FT_Render_Glyph(slot, FT_RENDER_MODE_MONO);
        int w = slot->bitmap.width;
        int h = slot->bitmap.rows;
        int t = slot->bitmap_top;
        int l = slot->bitmap_left;
        std::vector<uchar> buf(w*h);
        for (int p=0; p<w*h; ++p){
            if (slot->bitmap.buffer[p] == 0){
                buf[p] = (uchar) 0;
                buf[p] = (uchar) 255;
        cv::Mat C = cv::Mat(h, w, CV_8UC1, buf.data());
        cv::Mat R = cv::Mat(window_h, window_w, CV_8UC1, cv::Scalar(0));
        assert(l + w <= window_w);
        assert(base_line - t >= 0);
        assert(base_line - t + h <= window_h);
        C.copyTo(R.colRange(l, l + w).rowRange(base_line - t, base_line - t + h));
        R.copyTo(Window.colRange(i*window_w, (i+1)*window_w));
        FT_Bitmap_Done(library, &slot->bitmap);
    cv::imshow("test", Window);
    cv::imwrite("redering_test.jpg", Window);
    return 0;



前回の記事でFFmpeg の API を使ってビデオのデコードとエンコードをするコードを書いたのですが、前回の記事とFreeTypeを組み合わせて簡単なアニメーションを作ってみようと思います。今回はデコーダーは使わず、FreeTypeでレンダリングした文字データをそのままエンコーダーに渡します。
(2021/09/17 追記:メモリ解放関連の箇所を修正しました。)

#include <iostream>
#include <cassert>
#include <cmath>
#include <vector>
#include <ft2build.h>
#include <freetype/ftbitmap.h>
#include FT_FREETYPE_H
extern "C"{
    #include <libavformat/avformat.h>
    #include <libavcodec/avcodec.h>
    #include <libavutil/imgutils.h>
    #include <libavutil/opt.h>
    #include <libswscale/swscale.h>
    #include <libswresample/swresample.h>

int make_frames(std::vector<std::vector<uint8_t>> &frames, const char char_to_rotate, const int &CHAR_WIDTH, const int &CHAR_HEIGHT,
                const double &angular_velocity);

int main(int argc, char *argv[]){
    char char_to_rotate = argv[1][0];
    const char *output = argv[2];
    int num_frames = 300;
    int CHAR_WIDTH = 500;
    double ratio = 1.0;
    int CHAR_HEIGHT = (int) round(ratio * (double)CHAR_WIDTH);
    std::vector<std::vector<uint8_t>> frames(num_frames);
    int s;
    double angular_velocity = 3.0 / 30.0;
    s = make_frames(frames, char_to_rotate, CHAR_WIDTH, CHAR_HEIGHT, angular_velocity);
    AVFormatContext *outputFmtContxt = NULL;
    const AVCodec *encoder = NULL;
    AVCodecContext *encoderContxt = NULL;
    int ret = 0;
    const AVOutputFormat *outFmt = av_guess_format("mp4", NULL, NULL);
    avformat_alloc_output_context2(&outputFmtContxt, outFmt, NULL, NULL);
    int FR = 30;
    int pts_inc = 1000;
    enum AVPixelFormat pix_fmt = AV_PIX_FMT_YUV420P;
    AVRational fps = av_make_q(FR, 1);
    AVStream *out_stream = avformat_new_stream(outputFmtContxt, NULL);
    encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
    encoderContxt = avcodec_alloc_context3(encoder);
    encoderContxt->height = s;
    encoderContxt->width = s;
    encoderContxt->pix_fmt = pix_fmt;
    encoderContxt->gop_size = 250;
    encoderContxt->keyint_min = 25;
    encoderContxt->qmax = 10;
    encoderContxt->bit_rate = 3000000;
    encoderContxt->framerate = fps;
    encoderContxt->time_base = av_make_q(1, pts_inc * FR);
    av_opt_set(encoderContxt->priv_data, "tune", "zerolatency", 0);
    avcodec_open2(encoderContxt, encoder, NULL);
    avcodec_parameters_from_context(out_stream->codecpar, encoderContxt);
    av_dump_format(outputFmtContxt, 0, output, 1);
    avio_open(&outputFmtContxt->pb, output, AVIO_FLAG_WRITE);
    ret = avformat_write_header(outputFmtContxt, NULL);
    SwsContext *rgb2yuv = sws_getContext(s, s, AV_PIX_FMT_RGB24, s, s, pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
    AVPacket *packet = av_packet_alloc();
    packet->data = NULL;
    packet->size = 0;
    AVFrame *rgbframe = av_frame_alloc();
    rgbframe->width = s;
    rgbframe->height = s;
    rgbframe->format = AV_PIX_FMT_RGB24;
    rgbframe->pkt_duration = 1;
    ret = av_frame_get_buffer(rgbframe, 0);
    AVFrame *outframe = av_frame_alloc();
    outframe->width = s;
    outframe->height = s;
    outframe->format = pix_fmt;
    outframe->pkt_duration = pts_inc;
    ret = av_frame_get_buffer(outframe, 0);
    uint8_t *outbuf = (uint8_t*) av_malloc(av_image_get_buffer_size(pix_fmt, s, s, 1));
    ret = av_image_fill_arrays(outframe->data, outframe->linesize, outbuf, pix_fmt, s, s, 1);
    int64_t pts = 0;
    for (int f=0; f<(int) frames.size(); ++f){ 
        outframe->pict_type = AV_PICTURE_TYPE_NONE;
        outframe->key_frame = 1;
        outframe->pts = pts;
        pts += (uint64_t) pts_inc;
        ret = av_frame_make_writable(outframe);
        if (ret < 0){
            std::cout << "frame unwritable" << std::endl;
        ret = av_image_fill_arrays(rgbframe->data, rgbframe->linesize, frames[f].data(), AV_PIX_FMT_RGB24, s, s, 1);
        sws_scale(rgb2yuv, rgbframe->data, rgbframe->linesize, 0, s, outframe->data, outframe->linesize);
        ret = avcodec_send_frame(encoderContxt, outframe);
        if (ret < 0){break;}
        while (ret >= 0){
            ret = avcodec_receive_packet(encoderContxt, packet);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF || ret < 0){
            packet->pts = outframe->pts;
            packet->dts = packet->dts;
            packet->duration = pts_inc;
            ret = av_interleaved_write_frame(outputFmtContxt, packet);
    return 0;

int make_frames(std::vector<std::vector<uint8_t>> &frames, const char char_to_rotate, const int &CHAR_WIDTH, const int &CHAR_HEIGHT,
                const double &angular_velocity){
    const char *font = "NotoSansMono-Regular.ttf";
    int width = CHAR_WIDTH, height = CHAR_HEIGHT;
    FT_Library library;
    FT_Face face;
    FT_GlyphSlot slot;
    FT_Error error;
    error = FT_Init_FreeType(&library);
    if (error){
        std::cout << "error" << std::endl;
    error = FT_New_Face(library, font, 0, &face);
    if (error == FT_Err_Unknown_File_Format){
        std::cout << "unknown format" << std::endl;
    else if(error){
        std::cout << "error" << std::endl;
    error = FT_Select_Charmap(face, FT_ENCODING_UNICODE);
    if (error){
        std::cout << "Charmap error" << std::endl;
    error = FT_Set_Pixel_Sizes(face, width, height);
    if (error){
        std::cout << "size setting error" << std::endl;
    FT_ULong ch = char_to_rotate;
    FT_UInt index = FT_Get_Char_Index(face, ch);
    error = FT_Load_Glyph(face, index, FT_LOAD_RENDER);
    if (error){
        std::cout << "glyph loading error" << std::endl;
    slot = face->glyph;
    FT_Render_Glyph(slot, FT_RENDER_MODE_MONO);
    int w = slot->bitmap.width;
    int h = slot->bitmap.rows;
    std::vector<uint8_t> buf(w*h);
    for (int p=0; p<w*h; ++p){
        if (slot->bitmap.buffer[p] == 0){
            buf[p] = (uint8_t) 0;
            buf[p] = (uint8_t) 255;
    FT_Bitmap_Done(library, &slot->bitmap);
    double c_x = w / 2.0;
    double c_y = h / 2.0;
    double L = c_x * c_x + c_y * c_y;
    int l = round(sqrt(L));
    int num_frames = (int) frames.size();
    int D_X = (2*l - w) / 2, D_Y = (2*l - h) / 2;
    for (int t=0; t<num_frames; ++t){
        frames[t].resize(3 * 4 * l * l);
        std::fill(frames[t].begin(), frames[t].end(), 0);
        double theta = - t * angular_velocity;
        for (int i=0; i<h; ++i){
            double y = l - (D_Y + i);
            for (int j=0; j<w; ++j){
                double x = l - (D_X + j);
                int Y = round(x * sin(theta) + y * cos(theta));
                int X = round(x * cos(theta) - y * sin(theta));
                int I = l - Y;
                int J = l - X;
                if (buf[i * w + j] == 255){
                    frames[t][3 * (I * 2 * l + J)] = 255;
                    frames[t][3 * (I * 2 * l + J) + 1] = 255;
                    frames[t][3 * (I * 2 * l + J) + 2] = 255;
        for (int i=1; i<2*l-1; ++i){
            for (int j=1; j<2*l-1; ++j){
                if (frames[t][3 * (i * 2 * l + j)] == 0){
                    if (frames[t][3 * (i * 2 * l + j - 1)] == 255 && frames[t][3 * (i * 2 * l + j + 1)] == 255){
                        frames[t][3 * (i * 2 * l + j)] = 255;
                        frames[t][3 * (i * 2 * l + j) + 1] = 255;
                        frames[t][3 * (i * 2 * l + j) + 2] = 255;
    return 2 * l; 



