30#define MAX_SUPPORTED_WIDTH 1950
31#define MAX_SUPPORTED_HEIGHT 1100
34#include "libavutil/hwcontext_vaapi.h"
36typedef struct VAAPIDecodeContext {
38 VAEntrypoint va_entrypoint;
40 VAContextID va_context;
42#if FF_API_STRUCT_VAAPI_CONTEXT
45 struct vaapi_context *old_context;
46 AVBufferRef *device_ref;
50 AVHWDeviceContext *device;
51 AVVAAPIDeviceContext *hwctx;
53 AVHWFramesContext *frames;
54 AVVAAPIFramesContext *hwfc;
56 enum AVPixelFormat surface_format;
71FFmpegReader::FFmpegReader(
const std::string &
path,
bool inspect_reader)
72 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
73 path(
path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
74 seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0),
76 video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
77 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
85 pts_offset_seconds = NO_PTS_OFFSET;
86 video_pts_seconds = NO_PTS_OFFSET;
87 audio_pts_seconds = NO_PTS_OFFSET;
116 if (abs(diff) <= amount)
127static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
129 const enum AVPixelFormat *p;
131 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
133#if defined(__linux__)
135 case AV_PIX_FMT_VAAPI:
140 case AV_PIX_FMT_VDPAU:
148 case AV_PIX_FMT_DXVA2_VLD:
153 case AV_PIX_FMT_D3D11:
159#if defined(__APPLE__)
161 case AV_PIX_FMT_VIDEOTOOLBOX:
168 case AV_PIX_FMT_CUDA:
184 return AV_PIX_FMT_NONE;
187int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
191 case AV_CODEC_ID_H264:
192 case AV_CODEC_ID_MPEG2VIDEO:
193 case AV_CODEC_ID_VC1:
194 case AV_CODEC_ID_WMV1:
195 case AV_CODEC_ID_WMV2:
196 case AV_CODEC_ID_WMV3:
211 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
221 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
225 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
232 packet_status.
reset(
true);
235 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
237 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
244 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
251 if (videoStream == -1 && audioStream == -1)
255 if (videoStream != -1) {
260 pStream = pFormatCtx->streams[videoStream];
266 const AVCodec *pCodec = avcodec_find_decoder(codecId);
267 AVDictionary *
opts = NULL;
268 int retry_decode_open = 2;
273 if (
hw_de_on && (retry_decode_open==2)) {
275 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
278 retry_decode_open = 0;
283 if (pCodec == NULL) {
284 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
288 av_dict_set(&
opts,
"strict",
"experimental", 0);
292 int i_decoder_hw = 0;
294 char *adapter_ptr = NULL;
297 fprintf(stderr,
"Hardware decoding device number: %d\n", adapter_num);
300 pCodecCtx->get_format = get_hw_dec_format;
302 if (adapter_num < 3 && adapter_num >=0) {
303#if defined(__linux__)
304 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
305 adapter_ptr = adapter;
307 switch (i_decoder_hw) {
309 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
312 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
315 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
318 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
321 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
328 switch (i_decoder_hw) {
330 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
333 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
336 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
339 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
342 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
345#elif defined(__APPLE__)
348 switch (i_decoder_hw) {
350 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
353 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
356 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
366#if defined(__linux__)
367 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
369 if( adapter_ptr != NULL ) {
370#elif defined(__APPLE__)
371 if( adapter_ptr != NULL ) {
380 hw_device_ctx = NULL;
382 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
383 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
425 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
429 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &
opts);
430 if (avcodec_return < 0) {
431 std::stringstream avcodec_error_msg;
432 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
438 AVHWFramesConstraints *constraints = NULL;
439 void *hwconfig = NULL;
440 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
444 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
445 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
448 if (pCodecCtx->coded_width < constraints->min_width ||
449 pCodecCtx->coded_height < constraints->min_height ||
450 pCodecCtx->coded_width > constraints->max_width ||
451 pCodecCtx->coded_height > constraints->max_height) {
454 retry_decode_open = 1;
457 av_buffer_unref(&hw_device_ctx);
458 hw_device_ctx = NULL;
463 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
464 retry_decode_open = 0;
466 av_hwframe_constraints_free(&constraints);
479 if (pCodecCtx->coded_width < 0 ||
480 pCodecCtx->coded_height < 0 ||
481 pCodecCtx->coded_width > max_w ||
482 pCodecCtx->coded_height > max_h ) {
483 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
485 retry_decode_open = 1;
488 av_buffer_unref(&hw_device_ctx);
489 hw_device_ctx = NULL;
493 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
494 retry_decode_open = 0;
502 retry_decode_open = 0;
504 }
while (retry_decode_open);
513 if (audioStream != -1) {
518 aStream = pFormatCtx->streams[audioStream];
524 const AVCodec *aCodec = avcodec_find_decoder(codecId);
530 if (aCodec == NULL) {
531 throw InvalidCodec(
"A valid audio codec could not be found for this file.",
path);
535 AVDictionary *
opts = NULL;
536 av_dict_set(&
opts,
"strict",
"experimental", 0);
539 if (avcodec_open2(aCodecCtx, aCodec, &
opts) < 0)
540 throw InvalidCodec(
"An audio codec was found, but could not be opened.",
path);
550 AVDictionaryEntry *tag = NULL;
551 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
552 QString str_key = tag->key;
553 QString str_value = tag->value;
554 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
558 previous_packet_location.
frame = -1;
590 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
596 AVPacket *recent_packet = packet;
601 int max_attempts = 128;
606 "attempts", attempts);
618 RemoveAVPacket(recent_packet);
623 if(avcodec_is_open(pCodecCtx)) {
624 avcodec_flush_buffers(pCodecCtx);
630 av_buffer_unref(&hw_device_ctx);
631 hw_device_ctx = NULL;
639 if(avcodec_is_open(aCodecCtx)) {
640 avcodec_flush_buffers(aCodecCtx);
647 working_cache.
Clear();
650 avformat_close_input(&pFormatCtx);
651 av_freep(&pFormatCtx);
656 largest_frame_processed = 0;
657 seek_audio_frame_found = 0;
658 seek_video_frame_found = 0;
659 current_video_frame = 0;
660 last_video_frame.reset();
664bool FFmpegReader::HasAlbumArt() {
668 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
669 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
672void FFmpegReader::UpdateAudioInfo() {
689 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
720 if (aStream->duration > 0 && aStream->duration >
info.
duration) {
723 }
else if (pFormatCtx->duration > 0 &&
info.
duration <= 0.0f) {
725 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
768 AVDictionaryEntry *tag = NULL;
769 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
770 QString str_key = tag->key;
771 QString str_value = tag->value;
772 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
776void FFmpegReader::UpdateVideoInfo() {
784 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
791 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
803 if (pStream->sample_aspect_ratio.num != 0) {
826 if (!check_interlace) {
827 check_interlace =
true;
829 switch(field_order) {
830 case AV_FIELD_PROGRESSIVE:
843 case AV_FIELD_UNKNOWN:
845 check_interlace =
false;
860 if (
info.
duration <= 0.0f && pFormatCtx->duration >= 0) {
862 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
872 if (
info.
duration <= 0.0f && pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
890 is_duration_known =
false;
893 is_duration_known =
true;
903 AVDictionaryEntry *tag = NULL;
904 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
905 QString str_key = tag->key;
906 QString str_value = tag->value;
907 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
912 return this->is_duration_known;
918 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
921 if (requested_frame < 1)
927 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
943 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
957 int64_t diff = requested_frame - last_frame;
958 if (diff >= 1 && diff <= 20) {
960 frame = ReadStream(requested_frame);
965 Seek(requested_frame);
974 frame = ReadStream(requested_frame);
982std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
984 bool check_seek =
false;
985 int packet_error = -1;
995 CheckWorkingFrames(requested_frame);
1000 if (is_cache_found) {
1004 if (!hold_packet || !packet) {
1006 packet_error = GetNextPacket();
1007 if (packet_error < 0 && !packet) {
1018 check_seek = CheckSeek(
false);
1030 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
1034 ProcessVideoPacket(requested_frame);
1037 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
1041 ProcessAudioPacket(requested_frame);
1046 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1047 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1049 if (packet->stream_index == videoStream) {
1051 }
else if (packet->stream_index == audioStream) {
1057 RemoveAVPacket(packet);
1067 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1084 "largest_frame_processed", largest_frame_processed,
1085 "Working Cache Count", working_cache.
Count());
1094 CheckWorkingFrames(requested_frame);
1110 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1113 if (!frame->has_image_data) {
1118 frame->AddAudioSilence(samples_in_frame);
1123 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1125 f->AddAudioSilence(samples_in_frame);
1133int FFmpegReader::GetNextPacket() {
1134 int found_packet = 0;
1135 AVPacket *next_packet;
1136 next_packet =
new AVPacket();
1137 found_packet = av_read_frame(pFormatCtx, next_packet);
1141 RemoveAVPacket(packet);
1144 if (found_packet >= 0) {
1146 packet = next_packet;
1149 if (packet->stream_index == videoStream) {
1151 }
else if (packet->stream_index == audioStream) {
1160 return found_packet;
1164bool FFmpegReader::GetAVFrame() {
1165 int frameFinished = 0;
1171 int send_packet_err = 0;
1172 int64_t send_packet_pts = 0;
1173 if ((packet && packet->stream_index == videoStream) || !packet) {
1174 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1176 if (packet && send_packet_err >= 0) {
1177 send_packet_pts = GetPacketPTS();
1178 hold_packet =
false;
1188 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1189 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1190 if (send_packet_err == AVERROR(EAGAIN)) {
1192 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1194 if (send_packet_err == AVERROR(EINVAL)) {
1195 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1197 if (send_packet_err == AVERROR(ENOMEM)) {
1198 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1205 int receive_frame_err = 0;
1206 AVFrame *next_frame2;
1214 next_frame2 = next_frame;
1217 while (receive_frame_err >= 0) {
1218 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1220 if (receive_frame_err != 0) {
1221 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1223 if (receive_frame_err == AVERROR_EOF) {
1225 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1226 avcodec_flush_buffers(pCodecCtx);
1229 if (receive_frame_err == AVERROR(EINVAL)) {
1231 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1232 avcodec_flush_buffers(pCodecCtx);
1234 if (receive_frame_err == AVERROR(EAGAIN)) {
1236 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1238 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1240 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1251 if (next_frame2->format == hw_de_av_pix_fmt) {
1252 next_frame->format = AV_PIX_FMT_YUV420P;
1253 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1256 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1264 next_frame = next_frame2;
1272 av_image_alloc(pFrame->data, pFrame->linesize,
info.
width,
info.
height, (AVPixelFormat)(pStream->codecpar->format), 1);
1273 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)next_frame->data, next_frame->linesize,
1280 if (next_frame->pts != AV_NOPTS_VALUE) {
1283 video_pts = next_frame->pts;
1284 }
else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1286 video_pts = next_frame->pkt_dts;
1290 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1301 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1307 if (frameFinished) {
1311 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1320 return frameFinished;
1324bool FFmpegReader::CheckSeek(
bool is_video) {
1329 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1337 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1340 if (max_seeked_frame >= seeking_frame) {
1343 "is_video_seek", is_video_seek,
1344 "max_seeked_frame", max_seeked_frame,
1345 "seeking_frame", seeking_frame,
1346 "seeking_pts", seeking_pts,
1347 "seek_video_frame_found", seek_video_frame_found,
1348 "seek_audio_frame_found", seek_audio_frame_found);
1351 Seek(seeking_frame - (10 * seek_count * seek_count));
1355 "is_video_seek", is_video_seek,
1356 "packet->pts", GetPacketPTS(),
1357 "seeking_pts", seeking_pts,
1358 "seeking_frame", seeking_frame,
1359 "seek_video_frame_found", seek_video_frame_found,
1360 "seek_audio_frame_found", seek_audio_frame_found);
1374void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1377 int frame_finished = GetAVFrame();
1380 if (!frame_finished) {
1383 RemoveAVFrame(pFrame);
1389 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1392 if (!seek_video_frame_found && is_seeking)
1393 seek_video_frame_found = current_frame;
1399 working_cache.
Add(CreateFrame(requested_frame));
1411 AVFrame *pFrameRGB =
nullptr;
1412 uint8_t *buffer =
nullptr;
1416 if (pFrameRGB ==
nullptr)
1438 max_width = std::max(
float(max_width), max_width * max_scale_x);
1439 max_height = std::max(
float(max_height), max_height * max_scale_y);
1445 QSize width_size(max_width * max_scale_x,
1448 max_height * max_scale_y);
1450 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1451 max_width = std::max(max_width, width_size.width());
1452 max_height = std::max(max_height, width_size.height());
1454 max_width = std::max(max_width, height_size.width());
1455 max_height = std::max(max_height, height_size.height());
1462 float preview_ratio = 1.0;
1469 max_width =
info.
width * max_scale_x * preview_ratio;
1470 max_height =
info.
height * max_scale_y * preview_ratio;
1475 int original_height = height;
1476 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1478 float ratio = float(width) / float(height);
1479 int possible_width = round(max_height * ratio);
1480 int possible_height = round(max_width / ratio);
1482 if (possible_width <= max_width) {
1484 width = possible_width;
1485 height = max_height;
1489 height = possible_height;
1494 const int bytes_per_pixel = 4;
1495 int buffer_size = (width * height * bytes_per_pixel) + 128;
1496 buffer =
new unsigned char[buffer_size]();
1501 int scale_mode = SWS_FAST_BILINEAR;
1503 scale_mode = SWS_BICUBIC;
1509 sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
1510 original_height, pFrameRGB->data, pFrameRGB->linesize);
1513 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1518 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1521 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1525 working_cache.
Add(f);
1528 last_video_frame = f;
1534 RemoveAVFrame(pFrame);
1535 sws_freeContext(img_convert_ctx);
1541 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
1545void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1548 if (packet && packet->pts != AV_NOPTS_VALUE) {
1550 location = GetAudioPTSLocation(packet->pts);
1553 if (!seek_audio_frame_found && is_seeking)
1554 seek_audio_frame_found = location.
frame;
1561 working_cache.
Add(CreateFrame(requested_frame));
1565 "requested_frame", requested_frame,
1566 "target_frame", location.
frame,
1570 int frame_finished = 0;
1574 int packet_samples = 0;
1578 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1579 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1583 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1584 if (receive_frame_err >= 0) {
1587 if (receive_frame_err == AVERROR_EOF) {
1591 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1593 avcodec_flush_buffers(aCodecCtx);
1595 if (receive_frame_err != 0) {
1600 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1603 if (frame_finished) {
1609 audio_pts = audio_frame->pts;
1612 location = GetAudioPTSLocation(audio_pts);
1615 int plane_size = -1;
1621 data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
1625 packet_samples = audio_frame->nb_samples * nb_channels;
1634 int pts_remaining_samples = packet_samples /
info.
channels;
1637 if (pts_remaining_samples == 0) {
1639 "packet_samples", packet_samples,
1641 "pts_remaining_samples", pts_remaining_samples);
1645 while (pts_remaining_samples) {
1650 int samples = samples_per_frame - previous_packet_location.
sample_start;
1651 if (samples > pts_remaining_samples)
1652 samples = pts_remaining_samples;
1655 pts_remaining_samples -= samples;
1657 if (pts_remaining_samples > 0) {
1659 previous_packet_location.
frame++;
1668 "packet_samples", packet_samples,
1676 audio_converted->nb_samples = audio_frame->nb_samples;
1677 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0);
1693 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
1700 audio_converted->data,
1701 audio_converted->linesize[0],
1702 audio_converted->nb_samples,
1704 audio_frame->linesize[0],
1705 audio_frame->nb_samples);
1712 int64_t starting_frame_number = -1;
1713 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
1715 starting_frame_number = location.
frame;
1716 int channel_buffer_size = nb_samples;
1717 auto *channel_buffer = (
float *) (audio_converted->data[channel_filter]);
1721 int remaining_samples = channel_buffer_size;
1722 while (remaining_samples > 0) {
1727 int samples = std::fmin(samples_per_frame - start, remaining_samples);
1730 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
1733 f->AddAudio(
true, channel_filter, start, channel_buffer, samples, 1.0f);
1737 "frame", starting_frame_number,
1740 "channel", channel_filter,
1741 "samples_per_frame", samples_per_frame);
1744 working_cache.
Add(f);
1747 remaining_samples -= samples;
1750 if (remaining_samples > 0)
1751 channel_buffer += samples;
1754 starting_frame_number++;
1762 av_free(audio_converted->data[0]);
1771 "requested_frame", requested_frame,
1772 "starting_frame", location.
frame,
1773 "end_frame", starting_frame_number - 1,
1774 "audio_pts_seconds", audio_pts_seconds);
1780void FFmpegReader::Seek(int64_t requested_frame) {
1782 if (requested_frame < 1)
1783 requested_frame = 1;
1786 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
1793 "requested_frame", requested_frame,
1794 "seek_count", seek_count,
1795 "last_frame", last_frame);
1798 working_cache.
Clear();
1802 video_pts_seconds = NO_PTS_OFFSET;
1804 audio_pts_seconds = NO_PTS_OFFSET;
1805 hold_packet =
false;
1807 current_video_frame = 0;
1808 largest_frame_processed = 0;
1813 packet_status.
reset(
false);
1819 int buffer_amount = std::max(max_concurrent_frames, 8);
1820 if (requested_frame - buffer_amount < 20) {
1834 if (seek_count == 1) {
1837 seeking_pts = ConvertFrameToVideoPTS(1);
1839 seek_audio_frame_found = 0;
1840 seek_video_frame_found = 0;
1844 bool seek_worked =
false;
1845 int64_t seek_target = 0;
1849 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
1851 fprintf(stderr,
"%s: error while seeking video stream\n", pFormatCtx->AV_FILENAME);
1854 is_video_seek =
true;
1861 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
1863 fprintf(stderr,
"%s: error while seeking audio stream\n", pFormatCtx->AV_FILENAME);
1866 is_video_seek =
false;
1875 avcodec_flush_buffers(aCodecCtx);
1879 avcodec_flush_buffers(pCodecCtx);
1882 previous_packet_location.
frame = -1;
1887 if (seek_count == 1) {
1889 seeking_pts = seek_target;
1890 seeking_frame = requested_frame;
1892 seek_audio_frame_found = 0;
1893 seek_video_frame_found = 0;
1921int64_t FFmpegReader::GetPacketPTS() {
1923 int64_t current_pts = packet->pts;
1924 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
1925 current_pts = packet->dts;
1931 return AV_NOPTS_VALUE;
1936void FFmpegReader::UpdatePTSOffset() {
1937 if (pts_offset_seconds != NO_PTS_OFFSET) {
1941 pts_offset_seconds = 0.0;
1942 double video_pts_offset_seconds = 0.0;
1943 double audio_pts_offset_seconds = 0.0;
1945 bool has_video_pts =
false;
1948 has_video_pts =
true;
1950 bool has_audio_pts =
false;
1953 has_audio_pts =
true;
1957 while (!has_video_pts || !has_audio_pts) {
1959 if (GetNextPacket() < 0)
1964 int64_t pts = GetPacketPTS();
1967 if (!has_video_pts && packet->stream_index == videoStream) {
1973 if (std::abs(video_pts_offset_seconds) <= 10.0) {
1974 has_video_pts =
true;
1977 else if (!has_audio_pts && packet->stream_index == audioStream) {
1983 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
1984 has_audio_pts =
true;
1990 if (has_video_pts && has_audio_pts) {
2002 pts_offset_seconds = std::max(video_pts_offset_seconds, audio_pts_offset_seconds);
2007int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2009 int64_t previous_video_frame = current_video_frame;
2018 if (current_video_frame == 0)
2019 current_video_frame = frame;
2023 if (frame == previous_video_frame) {
2028 current_video_frame++;
2037int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2039 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2049int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2051 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2061AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2069 int64_t whole_frame = int64_t(frame);
2072 double sample_start_percentage = frame - double(whole_frame);
2078 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2081 if (whole_frame < 1)
2083 if (sample_start < 0)
2090 if (previous_packet_location.
frame != -1) {
2091 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2092 int64_t orig_frame = location.
frame;
2097 location.
frame = previous_packet_location.
frame;
2100 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2109 previous_packet_location = location;
2116std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2118 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2122 output = working_cache.
GetFrame(requested_frame);
2123 if(output)
return output;
2131 working_cache.
Add(output);
2134 if (requested_frame > largest_frame_processed)
2135 largest_frame_processed = requested_frame;
2142bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2145 bool seek_trash =
false;
2146 int64_t max_seeked_frame = seek_audio_frame_found;
2147 if (seek_video_frame_found > max_seeked_frame) {
2148 max_seeked_frame = seek_video_frame_found;
2150 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2151 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2159void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2162 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2165 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2166 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2169 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2172 std::shared_ptr<Frame> f = *working_itr;
2175 if (!f || f->number > requested_frame) {
2181 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2182 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2185 bool is_video_ready =
false;
2186 bool is_audio_ready =
false;
2187 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2188 if ((frame_pts_seconds <= video_pts_seconds)
2189 || (recent_pts_diff > 1.5)
2193 is_video_ready =
true;
2195 "frame_number", f->number,
2196 "frame_pts_seconds", frame_pts_seconds,
2197 "video_pts_seconds", video_pts_seconds,
2198 "recent_pts_diff", recent_pts_diff);
2202 for (int64_t previous_frame = requested_frame - 1; previous_frame > 0; previous_frame--) {
2204 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2206 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2211 if (last_video_frame && !f->has_image_data) {
2213 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2214 }
else if (!f->has_image_data) {
2215 f->AddColor(
"#000000");
2220 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2221 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2222 || (recent_pts_diff > 1.5)
2227 is_audio_ready =
true;
2229 "frame_number", f->number,
2230 "frame_pts_seconds", frame_pts_seconds,
2231 "audio_pts_seconds", audio_pts_seconds,
2232 "audio_pts_diff", audio_pts_diff,
2233 "recent_pts_diff", recent_pts_diff);
2235 bool is_seek_trash = IsPartialFrame(f->number);
2243 "frame_number", f->number,
2244 "is_video_ready", is_video_ready,
2245 "is_audio_ready", is_audio_ready,
2251 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2254 "requested_frame", requested_frame,
2255 "f->number", f->number,
2256 "is_seek_trash", is_seek_trash,
2257 "Working Cache Count", working_cache.
Count(),
2261 if (!is_seek_trash) {
2266 working_cache.
Remove(f->number);
2269 last_frame = f->number;
2272 working_cache.
Remove(f->number);
2279 working_frames.clear();
2280 working_frames.shrink_to_fit();
2284void FFmpegReader::CheckFPS() {
2292 int frames_per_second[3] = {0,0,0};
2293 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2296 int all_frames_detected = 0;
2297 int starting_frames_detected = 0;
2302 if (GetNextPacket() < 0)
2307 if (packet->stream_index == videoStream) {
2310 fps_index = int(video_seconds);
2313 if (fps_index >= 0 && fps_index < max_fps_index) {
2315 starting_frames_detected++;
2316 frames_per_second[fps_index]++;
2320 all_frames_detected++;
2325 float avg_fps = 30.0;
2326 if (starting_frames_detected > 0 && fps_index > 0) {
2327 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2331 if (avg_fps < 8.0) {
2340 if (all_frames_detected > 0) {
2354void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2358 av_freep(&remove_frame->data[0]);
2366void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2371 delete remove_packet;
2386 root[
"type"] =
"FFmpegReader";
2387 root[
"path"] =
path;
2402 catch (
const std::exception& e) {
2404 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
2415 if (!root[
"path"].isNull())
2416 path = root[
"path"].asString();
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define AVCODEC_REGISTER_ALL
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
#define FF_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
This class represents a clip (used to arrange readers on the timeline)
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any)
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
void Open() override
Open File - which is called by the constructor automatically.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
int num
Numerator for the fraction.
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
int den
Denominator for the fraction.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Exception when no valid codec is found for a file.
Exception for files that can not be found or opened.
Exception for invalid JSON.
Point GetMaxPoint() const
Get max point (by Y coordinate)
Exception when no streams are found in the file.
Exception when memory could not be allocated.
Coordinate co
This is the primary coordinate.
openshot::ReaderInfo info
Information about the current media file.
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
This namespace is the default namespace for all code in the openshot library.
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
const Json::Value stringToJson(const std::string value)
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int64_t packets_decoded()
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int video_bit_rate
The bit rate of the video stream (in bytes)
bool has_single_image
Determines if this file only contains a single image.
float duration
Length of time (in seconds)
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
int width
The width of the video (in pixesl)
int channels
The number of audio channels used in the audio stream.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
int height
The height of the video (in pixels)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
int64_t video_length
The number of frames in the video stream.
std::string acodec
The name of the audio codec used to encode / decode the video stream.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_video
Determines if this file has a video stream.
bool has_audio
Determines if this file has an audio stream.
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
int video_stream_index
The index of the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int audio_stream_index
The index of the audio stream.
int64_t file_size
Size of file (in bytes)