More than 3 years have passed since last update.
SDL と FFmpeg でつくるビデオプレイヤー

Posted at 2022-01-05
単なる備忘録
映像と音声を一方的に垂れ流すだけのポンコツ品です
# include <iostream>
# define SDL_MAIN_HANDLED
# include <SDL2/SDL.h>
extern "C"{
    #include <libavformat/avformat.h>
    #include <libavcodec/avcodec.h>
    #include <libavutil/imgutils.h>
    #include <libavutil/opt.h>
    #include <libswscale/swscale.h>
    #include <libswresample/swresample.h>
}

int main(int argc, char *argv[]){
    // MP4 ファイルのパス
    const char *input = argv[1];
    int ret = 0;
    AVFormatContext *inputFmtCtx = NULL;
    ret = avformat_open_input(&inputFmtCtx, input, NULL, NULL);
    if (ret < 0){
        std::cout << "Could not open input file" << std::endl;
    }
    ret = avformat_find_stream_info(inputFmtCtx, NULL);
    if (ret < 0){
        std::cout << "Could not find stream info" << std::endl;
    }
    int video_stream_index = -1;
    int audio_stream_index = -1;
    const AVCodec *videoDecoder = NULL;
    const AVCodec *audioDecoder = NULL;
    AVCodecContext *videoDecodeCtx = NULL;
    AVCodecContext *audioDecodeCtx = NULL;
    for (int i=0; i<(int)inputFmtCtx->nb_streams; ++i){
        AVStream *stream = inputFmtCtx->streams[i];
        AVCodecParameters *codecpar = stream->codecpar;
        if (codecpar->codec_type == AVMEDIA_TYPE_VIDEO){
            video_stream_index = i;
            videoDecoder = avcodec_find_decoder(codecpar->codec_id);
            videoDecodeCtx = avcodec_alloc_context3(videoDecoder);
            avcodec_parameters_to_context(videoDecodeCtx, codecpar);
            videoDecodeCtx->time_base = stream->time_base;
            videoDecodeCtx->framerate = stream->avg_frame_rate;
            avcodec_open2(videoDecodeCtx, videoDecoder, NULL);
        }
        if (codecpar->codec_type == AVMEDIA_TYPE_AUDIO){
            audio_stream_index = i;
            audioDecoder = avcodec_find_decoder(codecpar->codec_id);
            audioDecodeCtx = avcodec_alloc_context3(audioDecoder);
            avcodec_parameters_to_context(audioDecodeCtx, codecpar);
            audioDecodeCtx->time_base = stream->time_base;
            avcodec_open2(audioDecodeCtx, audioDecoder, NULL);
        }
    }
    int WIDTH = videoDecodeCtx->width;
    int HEIGHT = videoDecodeCtx->height;
    int channels = audioDecodeCtx->channels;
    AVPacket *packet = av_packet_alloc();
    AVFrame *frame = av_frame_alloc();
    /*
    AVFrame *rgbframe = av_frame_alloc();
    rgbframe->width = WIDTH;
    rgbframe->height = HEIGHT;
    rgbframe->format = AV_PIX_FMT_RGB24;
    uint8_t *buf = (uint8_t*) av_malloc(av_image_get_buffer_size(AV_PIX_FMT_RGB24, WIDTH, HEIGHT, 1));
    ret = av_image_fill_arrays(rgbframe->data, rgbframe->linesize, buf, AV_PIX_FMT_RGB24, WIDTH, HEIGHT, 1);
    SwsContext *torgb = sws_getContext(WIDTH, HEIGHT, videoDecodeCtx->pix_fmt,
                                       WIDTH, HEIGHT, AV_PIX_FMT_RGB24,
                                       SWS_BICUBIC, NULL, NULL, NULL);
    */
    //double scale = 0.6;
    int Window_WIDTH = 1280;
    int Window_HEIGHT = 720;
    AVPixelFormat dst_pix_fmt = AV_PIX_FMT_RGB24;
    SwsContext *scaler = sws_getContext(WIDTH, HEIGHT, videoDecodeCtx->pix_fmt,
                                        Window_WIDTH, Window_HEIGHT, dst_pix_fmt,
                                        SWS_BICUBIC, NULL, NULL, NULL);
    int dst_samplerate = 44100;
    AVSampleFormat dst_smp_fmt = AV_SAMPLE_FMT_S16;
    SwrContext *resampler = swr_alloc_set_opts(NULL, 
                                               audioDecodeCtx->channel_layout,
                                               dst_smp_fmt,
                                               dst_samplerate,
                                               audioDecodeCtx->channel_layout,
                                               audioDecodeCtx->sample_fmt,
                                               audioDecodeCtx->sample_rate,
                                               0, 
                                               NULL);
    swr_init(resampler);
    AVFrame *dstframe = av_frame_alloc();
    dstframe->width = Window_WIDTH;
    dstframe->height = Window_HEIGHT;
    dstframe->format = dst_pix_fmt;
    ret = av_frame_get_buffer(dstframe, 0);
    uint8_t *buf = (uint8_t*) av_malloc(av_image_get_buffer_size(dst_pix_fmt, Window_WIDTH, Window_HEIGHT, 1));
    ret = av_image_fill_arrays(dstframe->data, dstframe->linesize, buf, dst_pix_fmt, Window_WIDTH, Window_HEIGHT, 1);
    AVFrame *audioframe = av_frame_alloc();
    audioframe->channel_layout = audioDecodeCtx->channel_layout;
    audioframe->sample_rate = dst_samplerate;
    audioframe->format = dst_smp_fmt;
    ret = av_frame_get_buffer(audioframe, 0);
    //collect frames
    //SDL part
    if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO) < 0){
        std::cout << "Cound not initialize SDL2" << std::endl;
    }
    SDL_Window *window = SDL_CreateWindow(input,
                                          SDL_WINDOWPOS_CENTERED,
                                          SDL_WINDOWPOS_CENTERED,
                                          Window_WIDTH, Window_HEIGHT, 0);
    SDL_Renderer *renderer = SDL_CreateRenderer(window, -1, 0);
    SDL_Texture *texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_RGB24,
                                             SDL_TEXTUREACCESS_STREAMING,
                                             Window_WIDTH, Window_HEIGHT);
    SDL_Rect rect;
    rect.w = Window_WIDTH;
    rect.h = Window_HEIGHT;
    rect.x = 0;
    rect.y = 0;
    SDL_Event event;
    SDL_AudioDeviceID dev;
    SDL_AudioSpec want, have;
    SDL_zero(want);
    SDL_zero(have);
    want.freq = dst_samplerate;
    want.channels = channels;
    want.format = AUDIO_S16SYS;
    dev = SDL_OpenAudioDevice(NULL, 0, &want, &have, 0);
    SDL_PauseAudioDevice(dev, 0);
    bool running = true;
    bool playing = true;
    //start playing
    while (running){
        uint32_t start = SDL_GetTicks();
        while (playing){
            while (SDL_PollEvent(&event)){
                if (event.type == SDL_QUIT){
                    running = false;
                    playing = false;
                    break;
                }
                if (event.type == SDL_KEYDOWN && event.key.keysym.sym == SDLK_ESCAPE){
                    running = false;
                    playing = false;
                    break;
                }
            }
            ret = av_read_frame(inputFmtCtx, packet);
            if (ret < 0){
                running = false;
                break;
            }
            AVStream *stream = inputFmtCtx->streams[packet->stream_index];
            double timebase = av_q2d(stream->time_base);
            if (stream->codecpar->codec_type == video_stream_index){
                ret = avcodec_send_packet(videoDecodeCtx, packet);
                while (ret >= 0){
                    ret = avcodec_receive_frame(videoDecodeCtx, frame);
                    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
                    else if (ret >= 0){
                        uint32_t video_time = (double)frame->best_effort_timestamp * timebase * 1000;
                        while (true){
                            uint32_t elapsed = SDL_GetTicks() - start;
                            if (elapsed >= video_time) break;
                        }
                        sws_scale(scaler, 
                                  frame->data, 
                                  frame->linesize,
                                  0, 
                                  frame->height, 
                                  dstframe->data, 
                                  dstframe->linesize);
                        SDL_UpdateTexture(texture, 
                                          &rect, 
                                          dstframe->data[0], 
                                          dstframe->linesize[0]);
                        SDL_RenderClear(renderer);
                        SDL_RenderCopy(renderer, 
                                       texture, 
                                       NULL, 
                                       &rect);
                        SDL_RenderPresent(renderer);
                    }
                }
            }
            if (stream->codecpar->codec_type == audio_stream_index){
                ret = avcodec_send_packet(audioDecodeCtx, packet);
                while (ret >= 0){
                    ret = avcodec_receive_frame(audioDecodeCtx, frame);
                    if (ret >= 0){
                        int dst_samples = frame->channels * av_rescale_rnd(swr_get_delay(resampler, frame->sample_rate) + frame->nb_samples,
                                                                           dst_samplerate, 
                                                                           frame->sample_rate,
                                                                           AV_ROUND_UP);
                        uint8_t *audiobuf = NULL;
                        ret = av_samples_alloc(&audiobuf, 
                                               NULL, 
                                               1, 
                                               dst_samples,
                                               dst_smp_fmt, 
                                               1);
                        dst_samples = frame->channels * swr_convert(resampler, 
                                                                    &audiobuf, 
                                                                    dst_samples,
                                                                    (const uint8_t**) frame->data, 
                                                                    frame->nb_samples);
                        ret = av_samples_fill_arrays(audioframe->data, 
                                                     audioframe->linesize, 
                                                     audiobuf,
                                                     1, 
                                                     dst_samples, 
                                                     dst_smp_fmt, 
                                                     1);
                        SDL_QueueAudio(dev, audioframe->data[0], audioframe->linesize[0]);    
                    }
                }
                av_frame_unref(frame);
            }
        }       
    }
    av_packet_free(&packet);
    av_frame_free(&frame);
    //av_frame_free(&rgbframe);
    av_frame_free(&dstframe);
    av_frame_free(&audioframe);
    av_freep(&buf);
    //sws_freeContext(torgb);
    sws_freeContext(scaler);
    swr_free(&resampler);
    avformat_free_context(inputFmtCtx);
    avcodec_free_context(&videoDecodeCtx);
    avcodec_free_context(&audioDecodeCtx);
    SDL_CloseAudioDevice(dev);
    SDL_DestroyTexture(texture);
    SDL_DestroyRenderer(renderer);
    SDL_DestroyWindow(window);
    SDL_Quit();
    return 0;
}
You get articles that match your needs
You can efficiently read back useful information
You can use dark theme
What you can do with signing up