More than 3 years have passed since last update.

FFmpeg APIと FreeType API を使用してアニメーションを作る

Last updated at 2021-09-16Posted at 2021-09-15

はじめに

FreeType というのはフォントのレンダリングを行うためのライブラリで、iOSやAndroidなどで使われているようです(公式サイトより)。
今回はこのライブラリを使ってフォントのレンダリングを行ってみました。
##フォントのレンダリング
まずはFreeTypeでフォントファイルを読み込んでOpenCVで表示するコードから。
使用したフォントは NotoSansMono-Regular.ttf。

#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cassert>
#include <vector>
#include <string>
#include <ft2build.h>
#include <freetype/ftbitmap.h>
#include FT_FREETYPE_H
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>

int main(int argc, char *argv[]){
    //レンダリングする文字
    std::string chars_to_use = "abcd;#&@";
    std::vector<char> char_list(chars_to_use.size());
    for (int i=0; i<(int)chars_to_use.size(); ++i){
        char_list[i] = chars_to_use[i];
    }
    //フォントファイル
    const char *font = "NotoSansMono-Regular.ttf";
    //文字の横幅
    int width = atoi(argv[1]);
    //各初期化
    FT_Library library;
    FT_Face face;
    FT_GlyphSlot slot;
    FT_Error error;
    error = FT_Init_FreeType(&library);
    if (error){
        std::cout << "error" << std::endl;
    }
    error = FT_New_Face(library, font, 0, &face);
    if (error == FT_Err_Unknown_File_Format){
        std::cout << "unknown format" << std::endl;
    }
    else if(error){
        std::cout << "error" << std::endl;
    }
    error = FT_Select_Charmap(face, FT_ENCODING_UNICODE);
    if (error){
        std::cout << "Charmap error" << std::endl;
    }
    //横幅に対する縦幅の設定
    double ratio = (double) 3 / 3;
    int height = (int)round(ratio * (double)width);
    error = FT_Set_Pixel_Sizes(face, width, height);
    if (error){
        std::cout << "size setting error" << std::endl;
    }
    //レンダリングの準備
    int num_char = (int)char_list.size();
    std::vector<int> top_list(num_char);
    std::vector<int> bottom_list(num_char);
    for (int i=0; i<num_char; ++i){
        FT_ULong ch = char_list[i];
        FT_UInt index = FT_Get_Char_Index(face, ch);
        error = FT_Load_Glyph(face, index, FT_LOAD_RENDER);
        if (error){
            std::cout << "glyph loading error" << std::endl;
        }
        slot = face->glyph;
        FT_Render_Glyph(slot, FT_RENDER_MODE_MONO);
        int h = slot->bitmap.rows;
        int t = slot->bitmap_top;
        int b = h - t;
        top_list[i] = t;
        bottom_list[i] = b;
        FT_Bitmap_Done(library, &slot->bitmap);
    }
    int max_top = *std::max_element(top_list.begin(), top_list.end());
    int max_bottom = *std::max_element(bottom_list.begin(), bottom_list.end());
    int y_blanc = 10;
    int base_line = max_top + y_blanc;
    //表示ウィンドウの設定
    int window_w = width;
    int window_h = max_top + max_bottom + 2 * y_blanc;
    cv::Mat Window = cv::Mat(window_h, window_w*num_char, CV_8UC1, cv::Scalar(0));
    //レンダリング開始
    for (int i=0; i<num_char; ++i){
        FT_ULong ch = char_list[i];
        FT_UInt index = FT_Get_Char_Index(face, ch);
        error = FT_Load_Glyph(face, index, FT_LOAD_RENDER);
        if (error){
            std::cout << "glyph loading error" << std::endl;
        }
        slot = face->glyph;
        FT_Render_Glyph(slot, FT_RENDER_MODE_MONO);
        int w = slot->bitmap.width;
        int h = slot->bitmap.rows;
        int t = slot->bitmap_top;
        int l = slot->bitmap_left;
        std::vector<uchar> buf(w*h);
        for (int p=0; p<w*h; ++p){
            if (slot->bitmap.buffer[p] == 0){
                buf[p] = (uchar) 0;
            }
            else{
                buf[p] = (uchar) 255;
            }
        }
        cv::Mat C = cv::Mat(h, w, CV_8UC1, buf.data());
        cv::Mat R = cv::Mat(window_h, window_w, CV_8UC1, cv::Scalar(0));
        assert(l + w <= window_w);
        assert(base_line - t >= 0);
        assert(base_line - t + h <= window_h);
        C.copyTo(R.colRange(l, l + w).rowRange(base_line - t, base_line - t + h));
        R.copyTo(Window.colRange(i*window_w, (i+1)*window_w));
        FT_Bitmap_Done(library, &slot->bitmap);
    }
    //結果の表示と保存
    cv::imshow("test", Window);
    cv::waitKey(0);
    cv::imwrite("redering_test.jpg", Window);
    FT_Done_Face(face);
    FT_Done_FreeType(library);
    return 0;
}

レンダリングされた結果がこちら。

##アニメーションを作ってみる
前回の記事でFFmpeg の API を使ってビデオのデコードとエンコードをするコードを書いたのですが、前回の記事とFreeTypeを組み合わせて簡単なアニメーションを作ってみようと思います。今回はデコーダーは使わず、FreeTypeでレンダリングした文字データをそのままエンコーダーに渡します。
(2021/09/17 追記：メモリ解放関連の箇所を修正しました。)

#include <iostream>
#include <cassert>
#include <cmath>
#include <vector>
#include <ft2build.h>
#include <freetype/ftbitmap.h>
#include FT_FREETYPE_H
extern "C"{
    #include <libavformat/avformat.h>
    #include <libavcodec/avcodec.h>
    #include <libavutil/imgutils.h>
    #include <libavutil/opt.h>
    #include <libswscale/swscale.h>
    #include <libswresample/swresample.h>
}

int make_frames(std::vector<std::vector<uint8_t>> &frames, const char char_to_rotate, const int &CHAR_WIDTH, const int &CHAR_HEIGHT,
                const double &angular_velocity);

int main(int argc, char *argv[]){
    //使う文字
    char char_to_rotate = argv[1][0];
    //出力ファイル
    const char *output = argv[2];
    //フレーム数
    int num_frames = 300;
    //文字サイズ
    int CHAR_WIDTH = 500;
    double ratio = 1.0;
    int CHAR_HEIGHT = (int) round(ratio * (double)CHAR_WIDTH);
    std::vector<std::vector<uint8_t>> frames(num_frames);
    int s;
    //文字の回転角速度
    double angular_velocity = 3.0 / 30.0;
    //フレーム作成
    s = make_frames(frames, char_to_rotate, CHAR_WIDTH, CHAR_HEIGHT, angular_velocity);
    AVFormatContext *outputFmtContxt = NULL;
    const AVCodec *encoder = NULL;
    AVCodecContext *encoderContxt = NULL;
    int ret = 0;
    const AVOutputFormat *outFmt = av_guess_format("mp4", NULL, NULL);
    avformat_alloc_output_context2(&outputFmtContxt, outFmt, NULL, NULL);
    //エンコーダ―の設定
    int FR = 30;
    int pts_inc = 1000;
    enum AVPixelFormat pix_fmt = AV_PIX_FMT_YUV420P;
    AVRational fps = av_make_q(FR, 1);
    AVStream *out_stream = avformat_new_stream(outputFmtContxt, NULL);
    encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
    encoderContxt = avcodec_alloc_context3(encoder);
    encoderContxt->height = s;
    encoderContxt->width = s;
    encoderContxt->pix_fmt = pix_fmt;
    encoderContxt->gop_size = 250;
    encoderContxt->keyint_min = 25;
    encoderContxt->qmax = 10;
    encoderContxt->bit_rate = 3000000;
    encoderContxt->framerate = fps;
    encoderContxt->time_base = av_make_q(1, pts_inc * FR);
    av_opt_set(encoderContxt->priv_data, "tune", "zerolatency", 0);
    avcodec_open2(encoderContxt, encoder, NULL);
    avcodec_parameters_from_context(out_stream->codecpar, encoderContxt);
    //出力ファイルを準備
    av_dump_format(outputFmtContxt, 0, output, 1);
    avio_open(&outputFmtContxt->pb, output, AVIO_FLAG_WRITE);
    ret = avformat_write_header(outputFmtContxt, NULL);
    //YUVとRGBの変換用
    SwsContext *rgb2yuv = sws_getContext(s, s, AV_PIX_FMT_RGB24, s, s, pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
    //パケットとフレームを準備
    AVPacket *packet = av_packet_alloc();
    packet->data = NULL;
    packet->size = 0;
    AVFrame *rgbframe = av_frame_alloc();
    rgbframe->width = s;
    rgbframe->height = s;
    rgbframe->format = AV_PIX_FMT_RGB24;
    rgbframe->pkt_duration = 1;
    ret = av_frame_get_buffer(rgbframe, 0);
    AVFrame *outframe = av_frame_alloc();
    outframe->width = s;
    outframe->height = s;
    outframe->format = pix_fmt;
    outframe->pkt_duration = pts_inc;
    ret = av_frame_get_buffer(outframe, 0);
    uint8_t *outbuf = (uint8_t*) av_malloc(av_image_get_buffer_size(pix_fmt, s, s, 1));
    ret = av_image_fill_arrays(outframe->data, outframe->linesize, outbuf, pix_fmt, s, s, 1);
    //エンコード開始
    int64_t pts = 0;
    for (int f=0; f<(int) frames.size(); ++f){ 
        outframe->pict_type = AV_PICTURE_TYPE_NONE;
        outframe->key_frame = 1;
        outframe->pts = pts;
        pts += (uint64_t) pts_inc;
        ret = av_frame_make_writable(outframe);
        if (ret < 0){
            std::cout << "frame unwritable" << std::endl;
            break;
        }
        ret = av_image_fill_arrays(rgbframe->data, rgbframe->linesize, frames[f].data(), AV_PIX_FMT_RGB24, s, s, 1);
        sws_scale(rgb2yuv, rgbframe->data, rgbframe->linesize, 0, s, outframe->data, outframe->linesize);
        ret = avcodec_send_frame(encoderContxt, outframe);
        if (ret < 0){break;}
        while (ret >= 0){
            ret = avcodec_receive_packet(encoderContxt, packet);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF || ret < 0){
                break;
            }
            packet->pts = outframe->pts;
            packet->dts = packet->dts;
            packet->duration = pts_inc;
            ret = av_interleaved_write_frame(outputFmtContxt, packet);
        }
        av_packet_unref(packet);
    }
    //各メモリの解放
    av_write_trailer(outputFmtContxt);
    av_packet_free(&packet);
    av_frame_free(&rgbframe);
    av_frame_free(&outframe);
    avcodec_free_context(&encoderContxt);
    avformat_free_context(outputFmtContxt);
    av_freep(&outbuf);
    sws_freeContext(rgb2yuv);
    return 0;
}

//フレームの作成関数
int make_frames(std::vector<std::vector<uint8_t>> &frames, const char char_to_rotate, const int &CHAR_WIDTH, const int &CHAR_HEIGHT,
                const double &angular_velocity){
    const char *font = "NotoSansMono-Regular.ttf";
    int width = CHAR_WIDTH, height = CHAR_HEIGHT;
    //フリータイプの初期化
    FT_Library library;
    FT_Face face;
    FT_GlyphSlot slot;
    FT_Error error;
    error = FT_Init_FreeType(&library);
    if (error){
        std::cout << "error" << std::endl;
    }
    error = FT_New_Face(library, font, 0, &face);
    if (error == FT_Err_Unknown_File_Format){
        std::cout << "unknown format" << std::endl;
    }
    else if(error){
        std::cout << "error" << std::endl;
    }
    error = FT_Select_Charmap(face, FT_ENCODING_UNICODE);
    if (error){
        std::cout << "Charmap error" << std::endl;
    }
    error = FT_Set_Pixel_Sizes(face, width, height);
    if (error){
        std::cout << "size setting error" << std::endl;
    }
    //文字をレンダリング
    FT_ULong ch = char_to_rotate;
    FT_UInt index = FT_Get_Char_Index(face, ch);
    error = FT_Load_Glyph(face, index, FT_LOAD_RENDER);
    if (error){
        std::cout << "glyph loading error" << std::endl;
    }
    slot = face->glyph;
    FT_Render_Glyph(slot, FT_RENDER_MODE_MONO);
    int w = slot->bitmap.width;
    int h = slot->bitmap.rows;
    std::vector<uint8_t> buf(w*h);
    for (int p=0; p<w*h; ++p){
        if (slot->bitmap.buffer[p] == 0){
            buf[p] = (uint8_t) 0;
        }
        else{
            buf[p] = (uint8_t) 255;
        }
    }
    FT_Bitmap_Done(library, &slot->bitmap);
    FT_Done_Face(face);
    FT_Done_FreeType(library);
    //フレーム作成
    double c_x = w / 2.0;
    double c_y = h / 2.0;
    double L = c_x * c_x + c_y * c_y;
    int l = round(sqrt(L));
    int num_frames = (int) frames.size();
    int D_X = (2*l - w) / 2, D_Y = (2*l - h) / 2;
    for (int t=0; t<num_frames; ++t){
        frames[t].resize(3 * 4 * l * l);
        std::fill(frames[t].begin(), frames[t].end(), 0);
        double theta = - t * angular_velocity;
        for (int i=0; i<h; ++i){
            double y = l - (D_Y + i);
            for (int j=0; j<w; ++j){
                double x = l - (D_X + j);
                int Y = round(x * sin(theta) + y * cos(theta));
                int X = round(x * cos(theta) - y * sin(theta));
                int I = l - Y;
                int J = l - X;
                if (buf[i * w + j] == 255){
                    frames[t][3 * (I * 2 * l + J)] = 255;
                    frames[t][3 * (I * 2 * l + J) + 1] = 255;
                    frames[t][3 * (I * 2 * l + J) + 2] = 255;
                }
            }
        }
        for (int i=1; i<2*l-1; ++i){
            for (int j=1; j<2*l-1; ++j){
                if (frames[t][3 * (i * 2 * l + j)] == 0){
                    if (frames[t][3 * (i * 2 * l + j - 1)] == 255 && frames[t][3 * (i * 2 * l + j + 1)] == 255){
                        frames[t][3 * (i * 2 * l + j)] = 255;
                        frames[t][3 * (i * 2 * l + j) + 1] = 255;
                        frames[t][3 * (i * 2 * l + j) + 2] = 255;
                    }
                }
            }
        }
    }
    return 2 * l; 
}

出来上がったアニメーションがこちら。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up