OpenShot Library | libopenshot 0.5.0
Loading...
Searching...
No Matches
FFmpegReader.cpp
Go to the documentation of this file.
1
11
12// Copyright (c) 2008-2024 OpenShot Studios, LLC, Fabrice Bellard
13//
14// SPDX-License-Identifier: LGPL-3.0-or-later
15
16#include <thread> // for std::this_thread::sleep_for
17#include <chrono> // for std::chrono::milliseconds
18#include <algorithm>
19#include <cmath>
20#include <unistd.h>
21
22#include "FFmpegUtilities.h"
23#include "effects/CropHelpers.h"
24
25#include "FFmpegReader.h"
26#include "Exceptions.h"
27#include "MemoryTrim.h"
28#include "Timeline.h"
29#include "ZmqLogger.h"
30
31#define ENABLE_VAAPI 0
32
33#if USE_HW_ACCEL
34#define MAX_SUPPORTED_WIDTH 1950
35#define MAX_SUPPORTED_HEIGHT 1100
36
37#if ENABLE_VAAPI
38#include "libavutil/hwcontext_vaapi.h"
39
40typedef struct VAAPIDecodeContext {
41 VAProfile va_profile;
42 VAEntrypoint va_entrypoint;
43 VAConfigID va_config;
44 VAContextID va_context;
45
46#if FF_API_STRUCT_VAAPI_CONTEXT
47 // FF_DISABLE_DEPRECATION_WARNINGS
48 int have_old_context;
49 struct vaapi_context *old_context;
50 AVBufferRef *device_ref;
51 // FF_ENABLE_DEPRECATION_WARNINGS
52#endif
53
54 AVHWDeviceContext *device;
55 AVVAAPIDeviceContext *hwctx;
56
57 AVHWFramesContext *frames;
58 AVVAAPIFramesContext *hwfc;
59
60 enum AVPixelFormat surface_format;
61 int surface_count;
62 } VAAPIDecodeContext;
63#endif // ENABLE_VAAPI
64#endif // USE_HW_ACCEL
65
66
67using namespace openshot;
68
69int hw_de_on = 0;
70#if USE_HW_ACCEL
71 AVPixelFormat hw_de_av_pix_fmt_global = AV_PIX_FMT_NONE;
72 AVHWDeviceType hw_de_av_device_type_global = AV_HWDEVICE_TYPE_NONE;
73#endif
74
75FFmpegReader::FFmpegReader(const std::string &path, bool inspect_reader)
76 : FFmpegReader(path, DurationStrategy::VideoPreferred, inspect_reader) {}
77
78FFmpegReader::FFmpegReader(const std::string &path, DurationStrategy duration_strategy, bool inspect_reader)
79 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
80 path(path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
81 seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0),
82 current_video_frame(0), packet(NULL), duration_strategy(duration_strategy),
83 audio_pts(0), video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
84 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
85 hold_packet(false) {
86
87 // Initialize FFMpeg, and register all formats and codecs
90
91 // Init timestamp offsets
92 pts_offset_seconds = NO_PTS_OFFSET;
93 video_pts_seconds = NO_PTS_OFFSET;
94 audio_pts_seconds = NO_PTS_OFFSET;
95
96 // Init cache
97 working_cache.SetMaxBytesFromInfo(info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels);
98 final_cache.SetMaxBytesFromInfo(24, info.width, info.height, info.sample_rate, info.channels);
99
100 // Open and Close the reader, to populate its attributes (such as height, width, etc...)
101 if (inspect_reader) {
102 Open();
103 Close();
104 }
105}
106
108 if (is_open)
109 // Auto close reader if not already done
110 Close();
111}
112
113// This struct holds the associated video frame and starting sample # for an audio packet.
114bool AudioLocation::is_near(AudioLocation location, int samples_per_frame, int64_t amount) {
115 // Is frame even close to this one?
116 if (abs(location.frame - frame) >= 2)
117 // This is too far away to be considered
118 return false;
119
120 // Note that samples_per_frame can vary slightly frame to frame when the
121 // audio sampling rate is not an integer multiple of the video fps.
122 int64_t diff = samples_per_frame * (location.frame - frame) + location.sample_start - sample_start;
123 if (abs(diff) <= amount)
124 // close
125 return true;
126
127 // not close
128 return false;
129}
130
131#if USE_HW_ACCEL
132
133// Get hardware pix format
134static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts)
135{
136 const enum AVPixelFormat *p;
137
138 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
139 switch (*p) {
140#if defined(__linux__)
141 // Linux pix formats
142 case AV_PIX_FMT_VAAPI:
143 hw_de_av_pix_fmt_global = AV_PIX_FMT_VAAPI;
144 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VAAPI;
145 return *p;
146 break;
147 case AV_PIX_FMT_VDPAU:
148 hw_de_av_pix_fmt_global = AV_PIX_FMT_VDPAU;
149 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VDPAU;
150 return *p;
151 break;
152#endif
153#if defined(_WIN32)
154 // Windows pix formats
155 case AV_PIX_FMT_DXVA2_VLD:
156 hw_de_av_pix_fmt_global = AV_PIX_FMT_DXVA2_VLD;
157 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_DXVA2;
158 return *p;
159 break;
160 case AV_PIX_FMT_D3D11:
161 hw_de_av_pix_fmt_global = AV_PIX_FMT_D3D11;
162 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_D3D11VA;
163 return *p;
164 break;
165#endif
166#if defined(__APPLE__)
167 // Apple pix formats
168 case AV_PIX_FMT_VIDEOTOOLBOX:
169 hw_de_av_pix_fmt_global = AV_PIX_FMT_VIDEOTOOLBOX;
170 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
171 return *p;
172 break;
173#endif
174 // Cross-platform pix formats
175 case AV_PIX_FMT_CUDA:
176 hw_de_av_pix_fmt_global = AV_PIX_FMT_CUDA;
177 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_CUDA;
178 return *p;
179 break;
180 case AV_PIX_FMT_QSV:
181 hw_de_av_pix_fmt_global = AV_PIX_FMT_QSV;
182 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_QSV;
183 return *p;
184 break;
185 default:
186 // This is only here to silence unused-enum warnings
187 break;
188 }
189 }
190 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::get_hw_dec_format (Unable to decode this file using hardware decode)");
191 return AV_PIX_FMT_NONE;
192}
193
194int FFmpegReader::IsHardwareDecodeSupported(int codecid)
195{
196 int ret;
197 switch (codecid) {
198 case AV_CODEC_ID_H264:
199 case AV_CODEC_ID_MPEG2VIDEO:
200 case AV_CODEC_ID_VC1:
201 case AV_CODEC_ID_WMV1:
202 case AV_CODEC_ID_WMV2:
203 case AV_CODEC_ID_WMV3:
204 ret = 1;
205 break;
206 default :
207 ret = 0;
208 break;
209 }
210 return ret;
211}
212#endif // USE_HW_ACCEL
213
215 // Open reader if not already open
216 if (!is_open) {
217 // Prevent async calls to the following code
218 const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
219
220 // Initialize format context
221 pFormatCtx = NULL;
222 {
224 ZmqLogger::Instance()->AppendDebugMethod("Decode hardware acceleration settings", "hw_de_on", hw_de_on, "HARDWARE_DECODER", openshot::Settings::Instance()->HARDWARE_DECODER);
225 }
226
227 // Open video file
228 if (avformat_open_input(&pFormatCtx, path.c_str(), NULL, NULL) != 0)
229 throw InvalidFile("File could not be opened.", path);
230
231 // Retrieve stream information
232 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
233 throw NoStreamsFound("No streams found in file.", path);
234
235 videoStream = -1;
236 audioStream = -1;
237
238 // Init end-of-file detection variables
239 packet_status.reset(true);
240
241 // Loop through each stream, and identify the video and audio stream index
242 for (unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
243 // Is this a video stream?
244 if (AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
245 videoStream = i;
246 packet_status.video_eof = false;
247 packet_status.packets_eof = false;
248 packet_status.end_of_file = false;
249 }
250 // Is this an audio stream?
251 if (AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
252 audioStream = i;
253 packet_status.audio_eof = false;
254 packet_status.packets_eof = false;
255 packet_status.end_of_file = false;
256 }
257 }
258 if (videoStream == -1 && audioStream == -1)
259 throw NoStreamsFound("No video or audio streams found in this file.", path);
260
261 // Is there a video stream?
262 if (videoStream != -1) {
263 // Set the stream index
264 info.video_stream_index = videoStream;
265
266 // Set the codec and codec context pointers
267 pStream = pFormatCtx->streams[videoStream];
268
269 // Find the codec ID from stream
270 const AVCodecID codecId = AV_FIND_DECODER_CODEC_ID(pStream);
271
272 // Get codec and codec context from stream
273 const AVCodec *pCodec = avcodec_find_decoder(codecId);
274 AVDictionary *opts = NULL;
275 int retry_decode_open = 2;
276 // If hw accel is selected but hardware cannot handle repeat with software decoding
277 do {
278 pCodecCtx = AV_GET_CODEC_CONTEXT(pStream, pCodec);
279#if USE_HW_ACCEL
280 if (hw_de_on && (retry_decode_open==2)) {
281 // Up to here no decision is made if hardware or software decode
282 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
283 }
284#endif
285 retry_decode_open = 0;
286
287 // Set number of threads equal to number of processors (not to exceed 16)
288 pCodecCtx->thread_count = std::min(FF_VIDEO_NUM_PROCESSORS, 16);
289
290 if (pCodec == NULL) {
291 throw InvalidCodec("A valid video codec could not be found for this file.", path);
292 }
293
294 // Init options
295 av_dict_set(&opts, "strict", "experimental", 0);
296#if USE_HW_ACCEL
297 if (hw_de_on && hw_de_supported) {
298 // Open Hardware Acceleration
299 int i_decoder_hw = 0;
300 char adapter[256];
301 char *adapter_ptr = NULL;
302 int adapter_num;
304 fprintf(stderr, "Hardware decoding device number: %d\n", adapter_num);
305
306 // Set hardware pix format (callback)
307 pCodecCtx->get_format = get_hw_dec_format;
308
309 if (adapter_num < 3 && adapter_num >=0) {
310#if defined(__linux__)
311 snprintf(adapter,sizeof(adapter),"/dev/dri/renderD%d", adapter_num+128);
312 adapter_ptr = adapter;
314 switch (i_decoder_hw) {
315 case 1:
316 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
317 break;
318 case 2:
319 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
320 break;
321 case 6:
322 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
323 break;
324 case 7:
325 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
326 break;
327 default:
328 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
329 break;
330 }
331
332#elif defined(_WIN32)
333 adapter_ptr = NULL;
335 switch (i_decoder_hw) {
336 case 2:
337 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
338 break;
339 case 3:
340 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
341 break;
342 case 4:
343 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
344 break;
345 case 7:
346 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
347 break;
348 default:
349 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
350 break;
351 }
352#elif defined(__APPLE__)
353 adapter_ptr = NULL;
355 switch (i_decoder_hw) {
356 case 5:
357 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
358 break;
359 case 7:
360 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
361 break;
362 default:
363 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
364 break;
365 }
366#endif
367
368 } else {
369 adapter_ptr = NULL; // Just to be sure
370 }
371
372 // Check if it is there and writable
373#if defined(__linux__)
374 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
375#elif defined(_WIN32)
376 if( adapter_ptr != NULL ) {
377#elif defined(__APPLE__)
378 if( adapter_ptr != NULL ) {
379#endif
380 ZmqLogger::Instance()->AppendDebugMethod("Decode Device present using device");
381 }
382 else {
383 adapter_ptr = NULL; // use default
384 ZmqLogger::Instance()->AppendDebugMethod("Decode Device not present using default");
385 }
386
387 hw_device_ctx = NULL;
388 // Here the first hardware initialisations are made
389 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
390 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
391 throw InvalidCodec("Hardware device reference create failed.", path);
392 }
393
394 /*
395 av_buffer_unref(&ist->hw_frames_ctx);
396 ist->hw_frames_ctx = av_hwframe_ctx_alloc(hw_device_ctx);
397 if (!ist->hw_frames_ctx) {
398 av_log(avctx, AV_LOG_ERROR, "Error creating a CUDA frames context\n");
399 return AVERROR(ENOMEM);
400 }
401
402 frames_ctx = (AVHWFramesContext*)ist->hw_frames_ctx->data;
403
404 frames_ctx->format = AV_PIX_FMT_CUDA;
405 frames_ctx->sw_format = avctx->sw_pix_fmt;
406 frames_ctx->width = avctx->width;
407 frames_ctx->height = avctx->height;
408
409 av_log(avctx, AV_LOG_DEBUG, "Initializing CUDA frames context: sw_format = %s, width = %d, height = %d\n",
410 av_get_pix_fmt_name(frames_ctx->sw_format), frames_ctx->width, frames_ctx->height);
411
412
413 ret = av_hwframe_ctx_init(pCodecCtx->hw_device_ctx);
414 ret = av_hwframe_ctx_init(ist->hw_frames_ctx);
415 if (ret < 0) {
416 av_log(avctx, AV_LOG_ERROR, "Error initializing a CUDA frame pool\n");
417 return ret;
418 }
419 */
420 }
421 else {
422 throw InvalidCodec("Hardware device create failed.", path);
423 }
424 }
425#endif // USE_HW_ACCEL
426
427 // Disable per-frame threading for album arts
428 // Using FF_THREAD_FRAME adds one frame decoding delay per thread,
429 // but there's only one frame in this case.
430 if (HasAlbumArt())
431 {
432 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
433 }
434
435 // Open video codec
436 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &opts);
437 if (avcodec_return < 0) {
438 std::stringstream avcodec_error_msg;
439 avcodec_error_msg << "A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
440 throw InvalidCodec(avcodec_error_msg.str(), path);
441 }
442
443#if USE_HW_ACCEL
444 if (hw_de_on && hw_de_supported) {
445 AVHWFramesConstraints *constraints = NULL;
446 void *hwconfig = NULL;
447 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
448
449// TODO: needs va_config!
450#if ENABLE_VAAPI
451 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
452 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
453#endif // ENABLE_VAAPI
454 if (constraints) {
455 if (pCodecCtx->coded_width < constraints->min_width ||
456 pCodecCtx->coded_height < constraints->min_height ||
457 pCodecCtx->coded_width > constraints->max_width ||
458 pCodecCtx->coded_height > constraints->max_height) {
459 ZmqLogger::Instance()->AppendDebugMethod("DIMENSIONS ARE TOO LARGE for hardware acceleration\n");
460 hw_de_supported = 0;
461 retry_decode_open = 1;
462 AV_FREE_CONTEXT(pCodecCtx);
463 if (hw_device_ctx) {
464 av_buffer_unref(&hw_device_ctx);
465 hw_device_ctx = NULL;
466 }
467 }
468 else {
469 // All is just peachy
470 ZmqLogger::Instance()->AppendDebugMethod("\nDecode hardware acceleration is used\n", "Min width :", constraints->min_width, "Min Height :", constraints->min_height, "MaxWidth :", constraints->max_width, "MaxHeight :", constraints->max_height, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
471 retry_decode_open = 0;
472 }
473 av_hwframe_constraints_free(&constraints);
474 if (hwconfig) {
475 av_freep(&hwconfig);
476 }
477 }
478 else {
479 int max_h, max_w;
480 //max_h = ((getenv( "LIMIT_HEIGHT_MAX" )==NULL) ? MAX_SUPPORTED_HEIGHT : atoi(getenv( "LIMIT_HEIGHT_MAX" )));
482 //max_w = ((getenv( "LIMIT_WIDTH_MAX" )==NULL) ? MAX_SUPPORTED_WIDTH : atoi(getenv( "LIMIT_WIDTH_MAX" )));
484 ZmqLogger::Instance()->AppendDebugMethod("Constraints could not be found using default limit\n");
485 //cerr << "Constraints could not be found using default limit\n";
486 if (pCodecCtx->coded_width < 0 ||
487 pCodecCtx->coded_height < 0 ||
488 pCodecCtx->coded_width > max_w ||
489 pCodecCtx->coded_height > max_h ) {
490 ZmqLogger::Instance()->AppendDebugMethod("DIMENSIONS ARE TOO LARGE for hardware acceleration\n", "Max Width :", max_w, "Max Height :", max_h, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
491 hw_de_supported = 0;
492 retry_decode_open = 1;
493 AV_FREE_CONTEXT(pCodecCtx);
494 if (hw_device_ctx) {
495 av_buffer_unref(&hw_device_ctx);
496 hw_device_ctx = NULL;
497 }
498 }
499 else {
500 ZmqLogger::Instance()->AppendDebugMethod("\nDecode hardware acceleration is used\n", "Max Width :", max_w, "Max Height :", max_h, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
501 retry_decode_open = 0;
502 }
503 }
504 } // if hw_de_on && hw_de_supported
505 else {
506 ZmqLogger::Instance()->AppendDebugMethod("\nDecode in software is used\n");
507 }
508#else
509 retry_decode_open = 0;
510#endif // USE_HW_ACCEL
511 } while (retry_decode_open); // retry_decode_open
512 // Free options
513 av_dict_free(&opts);
514
515 // Update the File Info struct with video details (if a video stream is found)
516 UpdateVideoInfo();
517 }
518
519 // Is there an audio stream?
520 if (audioStream != -1) {
521 // Set the stream index
522 info.audio_stream_index = audioStream;
523
524 // Get a pointer to the codec context for the audio stream
525 aStream = pFormatCtx->streams[audioStream];
526
527 // Find the codec ID from stream
528 AVCodecID codecId = AV_FIND_DECODER_CODEC_ID(aStream);
529
530 // Get codec and codec context from stream
531 const AVCodec *aCodec = avcodec_find_decoder(codecId);
532 aCodecCtx = AV_GET_CODEC_CONTEXT(aStream, aCodec);
533
534 // Audio encoding does not typically use more than 2 threads (most codecs use 1 thread)
535 aCodecCtx->thread_count = std::min(FF_AUDIO_NUM_PROCESSORS, 2);
536
537 if (aCodec == NULL) {
538 throw InvalidCodec("A valid audio codec could not be found for this file.", path);
539 }
540
541 // Init options
542 AVDictionary *opts = NULL;
543 av_dict_set(&opts, "strict", "experimental", 0);
544
545 // Open audio codec
546 if (avcodec_open2(aCodecCtx, aCodec, &opts) < 0)
547 throw InvalidCodec("An audio codec was found, but could not be opened.", path);
548
549 // Free options
550 av_dict_free(&opts);
551
552 // Update the File Info struct with audio details (if an audio stream is found)
553 UpdateAudioInfo();
554 }
555
556 // Add format metadata (if any)
557 AVDictionaryEntry *tag = NULL;
558 while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
559 QString str_key = tag->key;
560 QString str_value = tag->value;
561 info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
562 }
563
564 // Process video stream side data (rotation, spherical metadata, etc)
565 for (unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
566 AVStream* st = pFormatCtx->streams[i];
567 if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
568 // Only inspect the first video stream
569#if (LIBAVFORMAT_VERSION_MAJOR < 62)
570 for (int j = 0; j < st->nb_side_data; j++) {
571 AVPacketSideData *sd = &st->side_data[j];
572#else
573 for (int j = 0; j < st->codecpar->nb_coded_side_data; j++) {
574 AVPacketSideData *sd = &st->codecpar->coded_side_data[j];
575#endif
576 // Handle rotation metadata (unchanged)
577 if (sd->type == AV_PKT_DATA_DISPLAYMATRIX &&
578 sd->size >= 9 * sizeof(int32_t) &&
579 !info.metadata.count("rotate"))
580 {
581 double rotation = -av_display_rotation_get(
582 reinterpret_cast<int32_t *>(sd->data));
583 if (std::isnan(rotation)) rotation = 0;
584 info.metadata["rotate"] = std::to_string(rotation);
585 }
586 // Handle spherical video metadata
587 else if (sd->type == AV_PKT_DATA_SPHERICAL) {
588 // Always mark as spherical
589 info.metadata["spherical"] = "1";
590
591 // Cast the raw bytes to an AVSphericalMapping
592 const AVSphericalMapping* map =
593 reinterpret_cast<const AVSphericalMapping*>(sd->data);
594
595 // Projection enum → string
596 const char* proj_name = av_spherical_projection_name(map->projection);
597 info.metadata["spherical_projection"] = proj_name
598 ? proj_name
599 : "unknown";
600
601 // Convert 16.16 fixed-point to float degrees
602 auto to_deg = [](int32_t v){
603 return (double)v / 65536.0;
604 };
605 info.metadata["spherical_yaw"] = std::to_string(to_deg(map->yaw));
606 info.metadata["spherical_pitch"] = std::to_string(to_deg(map->pitch));
607 info.metadata["spherical_roll"] = std::to_string(to_deg(map->roll));
608 }
609 }
610 break;
611 }
612 }
613
614 // Init previous audio location to zero
615 previous_packet_location.frame = -1;
616 previous_packet_location.sample_start = 0;
617
618 // Adjust cache size based on size of frame and audio
619 working_cache.SetMaxBytesFromInfo(info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels);
620 final_cache.SetMaxBytesFromInfo(24, info.width, info.height, info.sample_rate, info.channels);
621
622 // Scan PTS for any offsets (i.e. non-zero starting streams). At least 1 stream must start at zero timestamp.
623 // This method allows us to shift timestamps to ensure at least 1 stream is starting at zero.
624 UpdatePTSOffset();
625
626 // Override an invalid framerate
627 if (info.fps.ToFloat() > 240.0f || (info.fps.num <= 0 || info.fps.den <= 0) || info.video_length <= 0) {
628 // Calculate FPS, duration, video bit rate, and video length manually
629 // by scanning through all the video stream packets
630 CheckFPS();
631 }
632
633 // Mark as "open"
634 is_open = true;
635
636 // Seek back to beginning of file (if not already seeking)
637 if (!is_seeking) {
638 Seek(1);
639 }
640 }
641}
642
644 // Close all objects, if reader is 'open'
645 if (is_open) {
646 // Prevent async calls to the following code
647 const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
648
649 // Mark as "closed"
650 is_open = false;
651
652 // Keep track of most recent packet
653 AVPacket *recent_packet = packet;
654
655 // Drain any packets from the decoder
656 packet = NULL;
657 int attempts = 0;
658 int max_attempts = 128;
659 while (packet_status.packets_decoded() < packet_status.packets_read() && attempts < max_attempts) {
660 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::Close (Drain decoder loop)",
661 "packets_read", packet_status.packets_read(),
662 "packets_decoded", packet_status.packets_decoded(),
663 "attempts", attempts);
664 if (packet_status.video_decoded < packet_status.video_read) {
665 ProcessVideoPacket(info.video_length);
666 }
667 if (packet_status.audio_decoded < packet_status.audio_read) {
668 ProcessAudioPacket(info.video_length);
669 }
670 attempts++;
671 }
672
673 // Remove packet
674 if (recent_packet) {
675 RemoveAVPacket(recent_packet);
676 }
677
678 // Close the video codec
679 if (info.has_video) {
680 if(avcodec_is_open(pCodecCtx)) {
681 avcodec_flush_buffers(pCodecCtx);
682 }
683 AV_FREE_CONTEXT(pCodecCtx);
684#if USE_HW_ACCEL
685 if (hw_de_on) {
686 if (hw_device_ctx) {
687 av_buffer_unref(&hw_device_ctx);
688 hw_device_ctx = NULL;
689 }
690 }
691#endif // USE_HW_ACCEL
692 if (img_convert_ctx) {
693 sws_freeContext(img_convert_ctx);
694 img_convert_ctx = nullptr;
695 }
696 if (pFrameRGB_cached) {
697 AV_FREE_FRAME(&pFrameRGB_cached);
698 }
699 }
700
701 // Close the audio codec
702 if (info.has_audio) {
703 if(avcodec_is_open(aCodecCtx)) {
704 avcodec_flush_buffers(aCodecCtx);
705 }
706 AV_FREE_CONTEXT(aCodecCtx);
707 if (avr_ctx) {
708 SWR_CLOSE(avr_ctx);
709 SWR_FREE(&avr_ctx);
710 avr_ctx = nullptr;
711 }
712 }
713
714 // Clear final cache
715 final_cache.Clear();
716 working_cache.Clear();
717
718 // Close the video file
719 avformat_close_input(&pFormatCtx);
720 av_freep(&pFormatCtx);
721
722 // Release free’d arenas back to OS after heavy teardown
723 TrimMemoryToOS(true);
724
725 // Reset some variables
726 last_frame = 0;
727 hold_packet = false;
728 largest_frame_processed = 0;
729 seek_audio_frame_found = 0;
730 seek_video_frame_found = 0;
731 current_video_frame = 0;
732 last_video_frame.reset();
733 }
734}
735
736bool FFmpegReader::HasAlbumArt() {
737 // Check if the video stream we use is an attached picture
738 // This won't return true if the file has a cover image as a secondary stream
739 // like an MKV file with an attached image file
740 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
741 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
742}
743
744double FFmpegReader::PickDurationSeconds() const {
745 auto has_value = [](double value) { return value > 0.0; };
746
747 switch (duration_strategy) {
749 if (has_value(video_stream_duration_seconds))
750 return video_stream_duration_seconds;
751 if (has_value(audio_stream_duration_seconds))
752 return audio_stream_duration_seconds;
753 if (has_value(format_duration_seconds))
754 return format_duration_seconds;
755 break;
757 if (has_value(audio_stream_duration_seconds))
758 return audio_stream_duration_seconds;
759 if (has_value(video_stream_duration_seconds))
760 return video_stream_duration_seconds;
761 if (has_value(format_duration_seconds))
762 return format_duration_seconds;
763 break;
765 default:
766 {
767 double longest = 0.0;
768 if (has_value(video_stream_duration_seconds))
769 longest = std::max(longest, video_stream_duration_seconds);
770 if (has_value(audio_stream_duration_seconds))
771 longest = std::max(longest, audio_stream_duration_seconds);
772 if (has_value(format_duration_seconds))
773 longest = std::max(longest, format_duration_seconds);
774 if (has_value(longest))
775 return longest;
776 }
777 break;
778 }
779
780 if (has_value(format_duration_seconds))
781 return format_duration_seconds;
782 if (has_value(inferred_duration_seconds))
783 return inferred_duration_seconds;
784
785 return 0.0;
786}
787
788void FFmpegReader::ApplyDurationStrategy() {
789 const double fps_value = info.fps.ToDouble();
790 const double chosen_seconds = PickDurationSeconds();
791
792 if (chosen_seconds <= 0.0 || fps_value <= 0.0) {
793 info.duration = 0.0f;
794 info.video_length = 0;
795 is_duration_known = false;
796 return;
797 }
798
799 const int64_t frames = static_cast<int64_t>(std::llround(chosen_seconds * fps_value));
800 if (frames <= 0) {
801 info.duration = 0.0f;
802 info.video_length = 0;
803 is_duration_known = false;
804 return;
805 }
806
807 info.video_length = frames;
808 info.duration = static_cast<float>(static_cast<double>(frames) / fps_value);
809 is_duration_known = true;
810}
811
812void FFmpegReader::UpdateAudioInfo() {
813 // Set default audio channel layout (if needed)
814#if HAVE_CH_LAYOUT
815 if (!av_channel_layout_check(&(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout)))
816 AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout = (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
817#else
818 if (AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout == 0)
819 AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout = av_get_default_channel_layout(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels);
820#endif
821
822 if (info.sample_rate > 0) {
823 // Skip init - if info struct already populated
824 return;
825 }
826
827 auto record_duration = [](double &target, double seconds) {
828 if (seconds > 0.0)
829 target = std::max(target, seconds);
830 };
831
832 // Set values of FileInfo struct
833 info.has_audio = true;
834 info.file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
835 info.acodec = aCodecCtx->codec->name;
836#if HAVE_CH_LAYOUT
837 info.channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
838 info.channel_layout = (ChannelLayout) AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.u.mask;
839#else
840 info.channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
841 info.channel_layout = (ChannelLayout) AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout;
842#endif
843
844 // If channel layout is not set, guess based on the number of channels
845 if (info.channel_layout == 0) {
846 if (info.channels == 1) {
847 info.channel_layout = openshot::LAYOUT_MONO;
848 } else if (info.channels == 2) {
849 info.channel_layout = openshot::LAYOUT_STEREO;
850 }
851 }
852
853 info.sample_rate = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->sample_rate;
854 info.audio_bit_rate = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->bit_rate;
855 if (info.audio_bit_rate <= 0) {
856 // Get bitrate from format
857 info.audio_bit_rate = pFormatCtx->bit_rate;
858 }
859
860 // Set audio timebase
861 info.audio_timebase.num = aStream->time_base.num;
862 info.audio_timebase.den = aStream->time_base.den;
863
864 // Get timebase of audio stream (if valid) and greater than the current duration
865 if (aStream->duration > 0) {
866 record_duration(audio_stream_duration_seconds, aStream->duration * info.audio_timebase.ToDouble());
867 }
868 if (pFormatCtx->duration > 0) {
869 // Use the format's duration when stream duration is missing or shorter
870 record_duration(format_duration_seconds, static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
871 }
872
873 // Calculate duration from filesize and bitrate (if any)
874 if (info.duration <= 0.0f && info.video_bit_rate > 0 && info.file_size > 0) {
875 // Estimate from bitrate, total bytes, and framerate
876 record_duration(inferred_duration_seconds, static_cast<double>(info.file_size) / info.video_bit_rate);
877 }
878
879 // Set video timebase (if no video stream was found)
880 if (!info.has_video) {
881 // Set a few important default video settings (so audio can be divided into frames)
882 info.fps.num = 30;
883 info.fps.den = 1;
884 info.video_timebase.num = 1;
885 info.video_timebase.den = 30;
886 info.width = 720;
887 info.height = 480;
888
889 // Use timeline to set correct width & height (if any)
890 Clip *parent = static_cast<Clip *>(ParentClip());
891 if (parent) {
892 if (parent->ParentTimeline()) {
893 // Set max width/height based on parent clip's timeline (if attached to a timeline)
894 info.width = parent->ParentTimeline()->preview_width;
895 info.height = parent->ParentTimeline()->preview_height;
896 }
897 }
898 }
899
900 ApplyDurationStrategy();
901
902 // Add audio metadata (if any found)
903 AVDictionaryEntry *tag = NULL;
904 while ((tag = av_dict_get(aStream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
905 QString str_key = tag->key;
906 QString str_value = tag->value;
907 info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
908 }
909}
910
911void FFmpegReader::UpdateVideoInfo() {
912 if (info.vcodec.length() > 0) {
913 // Skip init - if info struct already populated
914 return;
915 }
916
917 auto record_duration = [](double &target, double seconds) {
918 if (seconds > 0.0)
919 target = std::max(target, seconds);
920 };
921
922 // Set values of FileInfo struct
923 info.has_video = true;
924 info.file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
925 info.height = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->height;
926 info.width = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->width;
927 info.vcodec = pCodecCtx->codec->name;
928 info.video_bit_rate = (pFormatCtx->bit_rate / 8);
929
930 // Frame rate from the container and codec
931 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
932 if (!check_fps) {
933 info.fps.num = framerate.num;
934 info.fps.den = framerate.den;
935 }
936
937 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::UpdateVideoInfo", "info.fps.num", info.fps.num, "info.fps.den", info.fps.den);
938
939 // TODO: remove excessive debug info in the next releases
940 // The debug info below is just for comparison and troubleshooting on users side during the transition period
941 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::UpdateVideoInfo (pStream->avg_frame_rate)", "num", pStream->avg_frame_rate.num, "den", pStream->avg_frame_rate.den);
942
943 if (pStream->sample_aspect_ratio.num != 0) {
944 info.pixel_ratio.num = pStream->sample_aspect_ratio.num;
945 info.pixel_ratio.den = pStream->sample_aspect_ratio.den;
946 } else if (AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.num != 0) {
947 info.pixel_ratio.num = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.num;
948 info.pixel_ratio.den = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.den;
949 } else {
950 info.pixel_ratio.num = 1;
951 info.pixel_ratio.den = 1;
952 }
953 info.pixel_format = AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx);
954
955 // Calculate the DAR (display aspect ratio)
956 Fraction size(info.width * info.pixel_ratio.num, info.height * info.pixel_ratio.den);
957
958 // Reduce size fraction
959 size.Reduce();
960
961 // Set the ratio based on the reduced fraction
962 info.display_ratio.num = size.num;
963 info.display_ratio.den = size.den;
964
965 // Get scan type and order from codec context/params
966 if (!check_interlace) {
967 check_interlace = true;
968 AVFieldOrder field_order = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->field_order;
969 switch(field_order) {
970 case AV_FIELD_PROGRESSIVE:
971 info.interlaced_frame = false;
972 break;
973 case AV_FIELD_TT:
974 case AV_FIELD_TB:
975 info.interlaced_frame = true;
976 info.top_field_first = true;
977 break;
978 case AV_FIELD_BT:
979 case AV_FIELD_BB:
980 info.interlaced_frame = true;
981 info.top_field_first = false;
982 break;
983 case AV_FIELD_UNKNOWN:
984 // Check again later?
985 check_interlace = false;
986 break;
987 }
988 // check_interlace will prevent these checks being repeated,
989 // unless it was cleared because we got an AV_FIELD_UNKNOWN response.
990 }
991
992 // Set the video timebase
993 info.video_timebase.num = pStream->time_base.num;
994 info.video_timebase.den = pStream->time_base.den;
995
996 // Set the duration in seconds, and video length (# of frames)
997 record_duration(video_stream_duration_seconds, pStream->duration * info.video_timebase.ToDouble());
998
999 // Check for valid duration (if found)
1000 if (pFormatCtx->duration >= 0) {
1001 // Use the format's duration as another candidate
1002 record_duration(format_duration_seconds, static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
1003 }
1004
1005 // Calculate duration from filesize and bitrate (if any)
1006 if (info.video_bit_rate > 0 && info.file_size > 0) {
1007 // Estimate from bitrate, total bytes, and framerate
1008 record_duration(inferred_duration_seconds, static_cast<double>(info.file_size) / info.video_bit_rate);
1009 }
1010
1011 // Certain "image" formats do not have a valid duration
1012 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1013 pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
1014 // Force an "image" duration
1015 record_duration(video_stream_duration_seconds, 60 * 60 * 1); // 1 hour duration
1016 info.has_single_image = true;
1017 }
1018 // Static GIFs can have no usable duration; fall back to a small default
1019 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1020 pFormatCtx && pFormatCtx->iformat && strcmp(pFormatCtx->iformat->name, "gif") == 0) {
1021 record_duration(video_stream_duration_seconds, 60 * 60 * 1); // 1 hour duration
1022 info.has_single_image = true;
1023 }
1024
1025 ApplyDurationStrategy();
1026
1027 // Add video metadata (if any)
1028 AVDictionaryEntry *tag = NULL;
1029 while ((tag = av_dict_get(pStream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
1030 QString str_key = tag->key;
1031 QString str_value = tag->value;
1032 info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1033 }
1034}
1035
1037 return this->is_duration_known;
1038}
1039
1040std::shared_ptr<Frame> FFmpegReader::GetFrame(int64_t requested_frame) {
1041 // Check for open reader (or throw exception)
1042 if (!is_open)
1043 throw ReaderClosed("The FFmpegReader is closed. Call Open() before calling this method.", path);
1044
1045 // Adjust for a requested frame that is too small or too large
1046 if (requested_frame < 1)
1047 requested_frame = 1;
1048 if (requested_frame > info.video_length && is_duration_known)
1049 requested_frame = info.video_length;
1050 if (info.has_video && info.video_length == 0)
1051 // Invalid duration of video file
1052 throw InvalidFile("Could not detect the duration of the video or audio stream.", path);
1053
1054 // Debug output
1055 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "requested_frame", requested_frame, "last_frame", last_frame);
1056
1057 // Check the cache for this frame
1058 std::shared_ptr<Frame> frame = final_cache.GetFrame(requested_frame);
1059 if (frame) {
1060 // Debug output
1061 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "returned cached frame", requested_frame);
1062
1063 // Return the cached frame
1064 return frame;
1065 } else {
1066
1067 // Prevent async calls to the remainder of this code
1068 const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
1069
1070 // Check the cache a 2nd time (due to the potential previous lock)
1071 frame = final_cache.GetFrame(requested_frame);
1072 if (frame) {
1073 // Debug output
1074 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "returned cached frame on 2nd look", requested_frame);
1075
1076 } else {
1077 // Frame is not in cache
1078 // Reset seek count
1079 seek_count = 0;
1080
1081 // Are we within X frames of the requested frame?
1082 int64_t diff = requested_frame - last_frame;
1083 if (diff >= 1 && diff <= 20) {
1084 // Continue walking the stream
1085 frame = ReadStream(requested_frame);
1086 } else {
1087 // Greater than 30 frames away, or backwards, we need to seek to the nearest key frame
1088 if (enable_seek) {
1089 // Only seek if enabled
1090 Seek(requested_frame);
1091
1092 } else if (!enable_seek && diff < 0) {
1093 // Start over, since we can't seek, and the requested frame is smaller than our position
1094 // Since we are seeking to frame 1, this actually just closes/re-opens the reader
1095 Seek(1);
1096 }
1097
1098 // Then continue walking the stream
1099 frame = ReadStream(requested_frame);
1100 }
1101 }
1102 return frame;
1103 }
1104}
1105
1106// Read the stream until we find the requested Frame
1107std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
1108 // Allocate video frame
1109 bool check_seek = false;
1110 int packet_error = -1;
1111
1112 // Debug output
1113 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream", "requested_frame", requested_frame);
1114
1115 // Loop through the stream until the correct frame is found
1116 while (true) {
1117 // Check if working frames are 'finished'
1118 if (!is_seeking) {
1119 // Check for final frames
1120 CheckWorkingFrames(requested_frame);
1121 }
1122
1123 // Check if requested 'final' frame is available (and break out of loop if found)
1124 bool is_cache_found = (final_cache.GetFrame(requested_frame) != NULL);
1125 if (is_cache_found) {
1126 break;
1127 }
1128
1129 if (!hold_packet || !packet) {
1130 // Get the next packet
1131 packet_error = GetNextPacket();
1132 if (packet_error < 0 && !packet) {
1133 // No more packets to be found
1134 packet_status.packets_eof = true;
1135 }
1136 }
1137
1138 // Debug output
1139 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (GetNextPacket)", "requested_frame", requested_frame,"packets_read", packet_status.packets_read(), "packets_decoded", packet_status.packets_decoded(), "is_seeking", is_seeking);
1140
1141 // Check the status of a seek (if any)
1142 if (is_seeking) {
1143 check_seek = CheckSeek(false);
1144 } else {
1145 check_seek = false;
1146 }
1147
1148 if (check_seek) {
1149 // Packet may become NULL on Close inside Seek if CheckSeek returns false
1150 // Jump to the next iteration of this loop
1151 continue;
1152 }
1153
1154 // Video packet
1155 if ((info.has_video && packet && packet->stream_index == videoStream) ||
1156 (info.has_video && packet_status.video_decoded < packet_status.video_read) ||
1157 (info.has_video && !packet && !packet_status.video_eof)) {
1158 // Process Video Packet
1159 ProcessVideoPacket(requested_frame);
1160 }
1161 // Audio packet
1162 if ((info.has_audio && packet && packet->stream_index == audioStream) ||
1163 (info.has_audio && !packet && packet_status.audio_decoded < packet_status.audio_read) ||
1164 (info.has_audio && !packet && !packet_status.audio_eof)) {
1165 // Process Audio Packet
1166 ProcessAudioPacket(requested_frame);
1167 }
1168
1169 // Remove unused packets (sometimes we purposely ignore video or audio packets,
1170 // if the has_video or has_audio properties are manually overridden)
1171 if ((!info.has_video && packet && packet->stream_index == videoStream) ||
1172 (!info.has_audio && packet && packet->stream_index == audioStream)) {
1173 // Keep track of deleted packet counts
1174 if (packet->stream_index == videoStream) {
1175 packet_status.video_decoded++;
1176 } else if (packet->stream_index == audioStream) {
1177 packet_status.audio_decoded++;
1178 }
1179
1180 // Remove unused packets (sometimes we purposely ignore video or audio packets,
1181 // if the has_video or has_audio properties are manually overridden)
1182 RemoveAVPacket(packet);
1183 packet = NULL;
1184 }
1185
1186 // Determine end-of-stream (waiting until final decoder threads finish)
1187 // Force end-of-stream in some situations
1188 packet_status.end_of_file = packet_status.packets_eof && packet_status.video_eof && packet_status.audio_eof;
1189 if ((packet_status.packets_eof && packet_status.packets_read() == packet_status.packets_decoded()) || packet_status.end_of_file) {
1190 // Force EOF (end of file) variables to true, if decoder does not support EOF detection.
1191 // If we have no more packets, and all known packets have been decoded
1192 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (force EOF)", "packets_read", packet_status.packets_read(), "packets_decoded", packet_status.packets_decoded(), "packets_eof", packet_status.packets_eof, "video_eof", packet_status.video_eof, "audio_eof", packet_status.audio_eof, "end_of_file", packet_status.end_of_file);
1193 if (!packet_status.video_eof) {
1194 packet_status.video_eof = true;
1195 }
1196 if (!packet_status.audio_eof) {
1197 packet_status.audio_eof = true;
1198 }
1199 packet_status.end_of_file = true;
1200 break;
1201 }
1202 } // end while
1203
1204 // Debug output
1205 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (Completed)",
1206 "packets_read", packet_status.packets_read(),
1207 "packets_decoded", packet_status.packets_decoded(),
1208 "end_of_file", packet_status.end_of_file,
1209 "largest_frame_processed", largest_frame_processed,
1210 "Working Cache Count", working_cache.Count());
1211
1212 // Have we reached end-of-stream (or the final frame)?
1213 if (!packet_status.end_of_file && requested_frame >= info.video_length) {
1214 // Force end-of-stream
1215 packet_status.end_of_file = true;
1216 }
1217 if (packet_status.end_of_file) {
1218 // Mark any other working frames as 'finished'
1219 CheckWorkingFrames(requested_frame);
1220 }
1221
1222 // Return requested frame (if found)
1223 std::shared_ptr<Frame> frame = final_cache.GetFrame(requested_frame);
1224 if (frame)
1225 // Return prepared frame
1226 return frame;
1227 else {
1228
1229 // Check if largest frame is still cached
1230 frame = final_cache.GetFrame(largest_frame_processed);
1231 int samples_in_frame = Frame::GetSamplesPerFrame(requested_frame, info.fps,
1232 info.sample_rate, info.channels);
1233 if (frame) {
1234 // Copy and return the largest processed frame (assuming it was the last in the video file)
1235 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1236
1237 // Use solid color (if no image data found)
1238 if (!frame->has_image_data) {
1239 // Use solid black frame if no image data available
1240 f->AddColor(info.width, info.height, "#000");
1241 }
1242 // Silence audio data (if any), since we are repeating the last frame
1243 frame->AddAudioSilence(samples_in_frame);
1244
1245 return frame;
1246 } else {
1247 // The largest processed frame is no longer in cache, return a blank frame
1248 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1249 f->AddColor(info.width, info.height, "#000");
1250 f->AddAudioSilence(samples_in_frame);
1251 return f;
1252 }
1253 }
1254
1255}
1256
1257// Get the next packet (if any)
1258int FFmpegReader::GetNextPacket() {
1259 int found_packet = 0;
1260 AVPacket *next_packet;
1261 next_packet = new AVPacket();
1262 found_packet = av_read_frame(pFormatCtx, next_packet);
1263
1264 if (packet) {
1265 // Remove previous packet before getting next one
1266 RemoveAVPacket(packet);
1267 packet = NULL;
1268 }
1269 if (found_packet >= 0) {
1270 // Update current packet pointer
1271 packet = next_packet;
1272
1273 // Keep track of packet stats
1274 if (packet->stream_index == videoStream) {
1275 packet_status.video_read++;
1276 } else if (packet->stream_index == audioStream) {
1277 packet_status.audio_read++;
1278 }
1279 } else {
1280 // No more packets found
1281 delete next_packet;
1282 packet = NULL;
1283 }
1284 // Return if packet was found (or error number)
1285 return found_packet;
1286}
1287
1288// Get an AVFrame (if any)
1289bool FFmpegReader::GetAVFrame() {
1290 int frameFinished = 0;
1291
1292 // Decode video frame
1293 AVFrame *next_frame = AV_ALLOCATE_FRAME();
1294
1295#if IS_FFMPEG_3_2
1296 int send_packet_err = 0;
1297 int64_t send_packet_pts = 0;
1298 if ((packet && packet->stream_index == videoStream) || !packet) {
1299 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1300
1301 if (packet && send_packet_err >= 0) {
1302 send_packet_pts = GetPacketPTS();
1303 hold_packet = false;
1304 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet succeeded)", "send_packet_err", send_packet_err, "send_packet_pts", send_packet_pts);
1305 }
1306 }
1307
1308 #if USE_HW_ACCEL
1309 // Get the format from the variables set in get_hw_dec_format
1310 hw_de_av_pix_fmt = hw_de_av_pix_fmt_global;
1311 hw_de_av_device_type = hw_de_av_device_type_global;
1312 #endif // USE_HW_ACCEL
1313 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1314 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) + "])", "send_packet_err", send_packet_err, "send_packet_pts", send_packet_pts);
1315 if (send_packet_err == AVERROR(EAGAIN)) {
1316 hold_packet = true;
1317 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()", "send_packet_pts", send_packet_pts);
1318 }
1319 if (send_packet_err == AVERROR(EINVAL)) {
1320 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush", "send_packet_pts", send_packet_pts);
1321 }
1322 if (send_packet_err == AVERROR(ENOMEM)) {
1323 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors", "send_packet_pts", send_packet_pts);
1324 }
1325 }
1326
1327 // Always try and receive a packet, if not EOF.
1328 // Even if the above avcodec_send_packet failed to send,
1329 // we might still need to receive a packet.
1330 int receive_frame_err = 0;
1331 AVFrame *next_frame2;
1332#if USE_HW_ACCEL
1333 if (hw_de_on && hw_de_supported) {
1334 next_frame2 = AV_ALLOCATE_FRAME();
1335 }
1336 else
1337#endif // USE_HW_ACCEL
1338 {
1339 next_frame2 = next_frame;
1340 }
1341 pFrame = AV_ALLOCATE_FRAME();
1342 while (receive_frame_err >= 0) {
1343 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1344
1345 if (receive_frame_err != 0) {
1346 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])", "receive_frame_err", receive_frame_err, "send_packet_pts", send_packet_pts);
1347
1348 if (receive_frame_err == AVERROR_EOF) {
1350 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)", "send_packet_pts", send_packet_pts);
1351 avcodec_flush_buffers(pCodecCtx);
1352 packet_status.video_eof = true;
1353 }
1354 if (receive_frame_err == AVERROR(EINVAL)) {
1356 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)", "send_packet_pts", send_packet_pts);
1357 avcodec_flush_buffers(pCodecCtx);
1358 }
1359 if (receive_frame_err == AVERROR(EAGAIN)) {
1361 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)", "send_packet_pts", send_packet_pts);
1362 }
1363 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1365 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)", "send_packet_pts", send_packet_pts);
1366 }
1367
1368 // Break out of decoding loop
1369 // Nothing ready for decoding yet
1370 break;
1371 }
1372
1373#if USE_HW_ACCEL
1374 if (hw_de_on && hw_de_supported) {
1375 int err;
1376 if (next_frame2->format == hw_de_av_pix_fmt) {
1377 next_frame->format = AV_PIX_FMT_YUV420P;
1378 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1379 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (Failed to transfer data to output frame)", "hw_de_on", hw_de_on);
1380 }
1381 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1382 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (Failed to copy props to output frame)", "hw_de_on", hw_de_on);
1383 }
1384 }
1385 }
1386 else
1387#endif // USE_HW_ACCEL
1388 { // No hardware acceleration used -> no copy from GPU memory needed
1389 next_frame = next_frame2;
1390 }
1391
1392 // TODO also handle possible further frames
1393 // Use only the first frame like avcodec_decode_video2
1394 frameFinished = 1;
1395 packet_status.video_decoded++;
1396
1397 // Allocate image (align 32 for simd)
1398 if (AV_ALLOCATE_IMAGE(pFrame, (AVPixelFormat)(pStream->codecpar->format), info.width, info.height) <= 0) {
1399 throw OutOfMemory("Failed to allocate image buffer", path);
1400 }
1401 av_image_copy(pFrame->data, pFrame->linesize, (const uint8_t**)next_frame->data, next_frame->linesize,
1402 (AVPixelFormat)(pStream->codecpar->format), info.width, info.height);
1403
1404 // Get display PTS from video frame, often different than packet->pts.
1405 // Sending packets to the decoder (i.e. packet->pts) is async,
1406 // and retrieving packets from the decoder (frame->pts) is async. In most decoders
1407 // sending and retrieving are separated by multiple calls to this method.
1408 if (next_frame->pts != AV_NOPTS_VALUE) {
1409 // This is the current decoded frame (and should be the pts used) for
1410 // processing this data
1411 video_pts = next_frame->pts;
1412 } else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1413 // Some videos only set this timestamp (fallback)
1414 video_pts = next_frame->pkt_dts;
1415 }
1416
1418 "FFmpegReader::GetAVFrame (Successful frame received)", "video_pts", video_pts, "send_packet_pts", send_packet_pts);
1419
1420 // break out of loop after each successful image returned
1421 break;
1422 }
1423#if USE_HW_ACCEL
1424 if (hw_de_on && hw_de_supported) {
1425 AV_FREE_FRAME(&next_frame2);
1426 }
1427 #endif // USE_HW_ACCEL
1428#else
1429 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1430
1431 // always allocate pFrame (because we do that in the ffmpeg >= 3.2 as well); it will always be freed later
1432 pFrame = AV_ALLOCATE_FRAME();
1433
1434 // is frame finished
1435 if (frameFinished) {
1436 // AVFrames are clobbered on the each call to avcodec_decode_video, so we
1437 // must make a copy of the image data before this method is called again.
1438 avpicture_alloc((AVPicture *) pFrame, pCodecCtx->pix_fmt, info.width, info.height);
1439 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt, info.width,
1440 info.height);
1441 }
1442#endif // IS_FFMPEG_3_2
1443
1444 // deallocate the frame
1445 AV_FREE_FRAME(&next_frame);
1446
1447 // Did we get a video frame?
1448 return frameFinished;
1449}
1450
1451// Check the current seek position and determine if we need to seek again
1452bool FFmpegReader::CheckSeek(bool is_video) {
1453 // Are we seeking for a specific frame?
1454 if (is_seeking) {
1455 // Determine if both an audio and video packet have been decoded since the seek happened.
1456 // If not, allow the ReadStream method to keep looping
1457 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1458 return false;
1459
1460 // Check for both streams
1461 if ((info.has_video && !seek_video_frame_found) || (info.has_audio && !seek_audio_frame_found))
1462 return false;
1463
1464 // Determine max seeked frame
1465 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1466
1467 // determine if we are "before" the requested frame
1468 if (max_seeked_frame >= seeking_frame) {
1469 // SEEKED TOO FAR
1470 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckSeek (Too far, seek again)",
1471 "is_video_seek", is_video_seek,
1472 "max_seeked_frame", max_seeked_frame,
1473 "seeking_frame", seeking_frame,
1474 "seeking_pts", seeking_pts,
1475 "seek_video_frame_found", seek_video_frame_found,
1476 "seek_audio_frame_found", seek_audio_frame_found);
1477
1478 // Seek again... to the nearest Keyframe
1479 Seek(seeking_frame - (10 * seek_count * seek_count));
1480 } else {
1481 // SEEK WORKED
1482 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckSeek (Successful)",
1483 "is_video_seek", is_video_seek,
1484 "packet->pts", GetPacketPTS(),
1485 "seeking_pts", seeking_pts,
1486 "seeking_frame", seeking_frame,
1487 "seek_video_frame_found", seek_video_frame_found,
1488 "seek_audio_frame_found", seek_audio_frame_found);
1489
1490 // Seek worked, and we are "before" the requested frame
1491 is_seeking = false;
1492 seeking_frame = 0;
1493 seeking_pts = -1;
1494 }
1495 }
1496
1497 // return the pts to seek to (if any)
1498 return is_seeking;
1499}
1500
1501// Process a video packet
1502void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1503 // Get the AVFrame from the current packet
1504 // This sets the video_pts to the correct timestamp
1505 int frame_finished = GetAVFrame();
1506
1507 // Check if the AVFrame is finished and set it
1508 if (!frame_finished) {
1509 // No AVFrame decoded yet, bail out
1510 if (pFrame) {
1511 RemoveAVFrame(pFrame);
1512 }
1513 return;
1514 }
1515
1516 // Calculate current frame #
1517 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1518
1519 // Track 1st video packet after a successful seek
1520 if (!seek_video_frame_found && is_seeking)
1521 seek_video_frame_found = current_frame;
1522
1523 // Create or get the existing frame object. Requested frame needs to be created
1524 // in working_cache at least once. Seek can clear the working_cache, so we must
1525 // add the requested frame back to the working_cache here. If it already exists,
1526 // it will be moved to the top of the working_cache.
1527 working_cache.Add(CreateFrame(requested_frame));
1528
1529 // Debug output
1530 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessVideoPacket (Before)", "requested_frame", requested_frame, "current_frame", current_frame);
1531
1532 // Init some things local (for OpenMP)
1533 PixelFormat pix_fmt = AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx);
1534 int height = info.height;
1535 int width = info.width;
1536 int64_t video_length = info.video_length;
1537
1538 // Create or reuse a RGB Frame (since most videos are not in RGB, we must convert it)
1539 AVFrame *pFrameRGB = pFrameRGB_cached;
1540 if (!pFrameRGB) {
1541 pFrameRGB = AV_ALLOCATE_FRAME();
1542 if (pFrameRGB == nullptr)
1543 throw OutOfMemory("Failed to allocate frame buffer", path);
1544 pFrameRGB_cached = pFrameRGB;
1545 }
1546 AV_RESET_FRAME(pFrameRGB);
1547 uint8_t *buffer = nullptr;
1548
1549 // Determine the max size of this source image (based on the timeline's size, the scaling mode,
1550 // and the scaling keyframes). This is a performance improvement, to keep the images as small as possible,
1551 // without losing quality. NOTE: We cannot go smaller than the timeline itself, or the add_layer timeline
1552 // method will scale it back to timeline size before scaling it smaller again. This needs to be fixed in
1553 // the future.
1554 int max_width = info.width;
1555 int max_height = info.height;
1556
1557 Clip *parent = static_cast<Clip *>(ParentClip());
1558 if (parent) {
1559 if (parent->ParentTimeline()) {
1560 // Set max width/height based on parent clip's timeline (if attached to a timeline)
1561 max_width = parent->ParentTimeline()->preview_width;
1562 max_height = parent->ParentTimeline()->preview_height;
1563 }
1564 if (parent->scale == SCALE_FIT || parent->scale == SCALE_STRETCH) {
1565 // Best fit or Stretch scaling (based on max timeline size * scaling keyframes)
1566 float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1567 float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1568 max_width = std::max(float(max_width), max_width * max_scale_x);
1569 max_height = std::max(float(max_height), max_height * max_scale_y);
1570
1571 } else if (parent->scale == SCALE_CROP) {
1572 // Cropping scale mode (based on max timeline size * cropped size * scaling keyframes)
1573 float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1574 float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1575 QSize width_size(max_width * max_scale_x,
1576 round(max_width / (float(info.width) / float(info.height))));
1577 QSize height_size(round(max_height / (float(info.height) / float(info.width))),
1578 max_height * max_scale_y);
1579 // respect aspect ratio
1580 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1581 max_width = std::max(max_width, width_size.width());
1582 max_height = std::max(max_height, width_size.height());
1583 } else {
1584 max_width = std::max(max_width, height_size.width());
1585 max_height = std::max(max_height, height_size.height());
1586 }
1587
1588 } else {
1589 // Scale video to equivalent unscaled size
1590 // Since the preview window can change sizes, we want to always
1591 // scale against the ratio of original video size to timeline size
1592 float preview_ratio = 1.0;
1593 if (parent->ParentTimeline()) {
1594 Timeline *t = (Timeline *) parent->ParentTimeline();
1595 preview_ratio = t->preview_width / float(t->info.width);
1596 }
1597 float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1598 float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1599 max_width = info.width * max_scale_x * preview_ratio;
1600 max_height = info.height * max_scale_y * preview_ratio;
1601 }
1602
1603 // If a crop effect is resizing the image, request enough pixels to preserve detail
1604 ApplyCropResizeScale(parent, info.width, info.height, max_width, max_height);
1605 }
1606
1607 // Determine if image needs to be scaled (for performance reasons)
1608 int original_height = height;
1609 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1610 // Override width and height (but maintain aspect ratio)
1611 float ratio = float(width) / float(height);
1612 int possible_width = round(max_height * ratio);
1613 int possible_height = round(max_width / ratio);
1614
1615 if (possible_width <= max_width) {
1616 // use calculated width, and max_height
1617 width = possible_width;
1618 height = max_height;
1619 } else {
1620 // use max_width, and calculated height
1621 width = max_width;
1622 height = possible_height;
1623 }
1624 }
1625
1626 // Determine required buffer size and allocate buffer
1627 const int bytes_per_pixel = 4;
1628 int raw_buffer_size = (width * height * bytes_per_pixel) + 128;
1629
1630 // Aligned memory allocation (for speed)
1631 constexpr size_t ALIGNMENT = 32; // AVX2
1632 int buffer_size = ((raw_buffer_size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT;
1633 buffer = (unsigned char*) aligned_malloc(buffer_size, ALIGNMENT);
1634
1635 // Copy picture data from one AVFrame (or AVPicture) to another one.
1636 AV_COPY_PICTURE_DATA(pFrameRGB, buffer, PIX_FMT_RGBA, width, height);
1637
1638 int scale_mode = SWS_FAST_BILINEAR;
1639 if (openshot::Settings::Instance()->HIGH_QUALITY_SCALING) {
1640 scale_mode = SWS_BICUBIC;
1641 }
1642 img_convert_ctx = sws_getCachedContext(img_convert_ctx, info.width, info.height, AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx), width, height, PIX_FMT_RGBA, scale_mode, NULL, NULL, NULL);
1643 if (!img_convert_ctx)
1644 throw OutOfMemory("Failed to initialize sws context", path);
1645
1646 // Resize / Convert to RGB
1647 sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
1648 original_height, pFrameRGB->data, pFrameRGB->linesize);
1649
1650 // Create or get the existing frame object
1651 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1652
1653 // Add Image data to frame
1654 if (!ffmpeg_has_alpha(AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx))) {
1655 // Add image with no alpha channel, Speed optimization
1656 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1657 } else {
1658 // Add image with alpha channel (this will be converted to premultipled when needed, but is slower)
1659 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1660 }
1661
1662 // Update working cache
1663 working_cache.Add(f);
1664
1665 // Keep track of last last_video_frame
1666 last_video_frame = f;
1667
1668 // Free the RGB image
1669 AV_RESET_FRAME(pFrameRGB);
1670
1671 // Remove frame and packet
1672 RemoveAVFrame(pFrame);
1673
1674 // Get video PTS in seconds
1675 video_pts_seconds = (double(video_pts) * info.video_timebase.ToDouble()) + pts_offset_seconds;
1676
1677 // Debug output
1678 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessVideoPacket (After)", "requested_frame", requested_frame, "current_frame", current_frame, "f->number", f->number, "video_pts_seconds", video_pts_seconds);
1679}
1680
1681// Process an audio packet
1682void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1683 AudioLocation location;
1684 // Calculate location of current audio packet
1685 if (packet && packet->pts != AV_NOPTS_VALUE) {
1686 // Determine related video frame and starting sample # from audio PTS
1687 location = GetAudioPTSLocation(packet->pts);
1688
1689 // Track 1st audio packet after a successful seek
1690 if (!seek_audio_frame_found && is_seeking)
1691 seek_audio_frame_found = location.frame;
1692 }
1693
1694 // Create or get the existing frame object. Requested frame needs to be created
1695 // in working_cache at least once. Seek can clear the working_cache, so we must
1696 // add the requested frame back to the working_cache here. If it already exists,
1697 // it will be moved to the top of the working_cache.
1698 working_cache.Add(CreateFrame(requested_frame));
1699
1700 // Debug output
1701 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (Before)",
1702 "requested_frame", requested_frame,
1703 "target_frame", location.frame,
1704 "starting_sample", location.sample_start);
1705
1706 // Init an AVFrame to hold the decoded audio samples
1707 int frame_finished = 0;
1708 AVFrame *audio_frame = AV_ALLOCATE_FRAME();
1709 AV_RESET_FRAME(audio_frame);
1710
1711 int packet_samples = 0;
1712 int data_size = 0;
1713
1714#if IS_FFMPEG_3_2
1715 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1716 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1717 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (Packet not sent)");
1718 }
1719 else {
1720 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1721 if (receive_frame_err >= 0) {
1722 frame_finished = 1;
1723 }
1724 if (receive_frame_err == AVERROR_EOF) {
1725 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (EOF detected from decoder)");
1726 packet_status.audio_eof = true;
1727 }
1728 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1729 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (invalid frame received or EOF from decoder)");
1730 avcodec_flush_buffers(aCodecCtx);
1731 }
1732 if (receive_frame_err != 0) {
1733 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (frame not ready yet from decoder)");
1734 }
1735 }
1736#else
1737 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1738#endif
1739
1740 if (frame_finished) {
1741 packet_status.audio_decoded++;
1742
1743 // This can be different than the current packet, so we need to look
1744 // at the current AVFrame from the audio decoder. This timestamp should
1745 // be used for the remainder of this function
1746 audio_pts = audio_frame->pts;
1747
1748 // Determine related video frame and starting sample # from audio PTS
1749 location = GetAudioPTSLocation(audio_pts);
1750
1751 // determine how many samples were decoded
1752 int plane_size = -1;
1753#if HAVE_CH_LAYOUT
1754 int nb_channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
1755#else
1756 int nb_channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
1757#endif
1758 data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
1759 audio_frame->nb_samples, (AVSampleFormat) (AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx)), 1);
1760
1761 // Calculate total number of samples
1762 packet_samples = audio_frame->nb_samples * nb_channels;
1763 } else {
1764 if (audio_frame) {
1765 // Free audio frame
1766 AV_FREE_FRAME(&audio_frame);
1767 }
1768 }
1769
1770 // Estimate the # of samples and the end of this packet's location (to prevent GAPS for the next timestamp)
1771 int pts_remaining_samples = packet_samples / info.channels; // Adjust for zero based array
1772
1773 // Bail if no samples found
1774 if (pts_remaining_samples == 0) {
1775 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (No samples, bailing)",
1776 "packet_samples", packet_samples,
1777 "info.channels", info.channels,
1778 "pts_remaining_samples", pts_remaining_samples);
1779 return;
1780 }
1781
1782 while (pts_remaining_samples) {
1783 // Get Samples per frame (for this frame number)
1784 int samples_per_frame = Frame::GetSamplesPerFrame(previous_packet_location.frame, info.fps, info.sample_rate, info.channels);
1785
1786 // Calculate # of samples to add to this frame
1787 int samples = samples_per_frame - previous_packet_location.sample_start;
1788 if (samples > pts_remaining_samples)
1789 samples = pts_remaining_samples;
1790
1791 // Decrement remaining samples
1792 pts_remaining_samples -= samples;
1793
1794 if (pts_remaining_samples > 0) {
1795 // next frame
1796 previous_packet_location.frame++;
1797 previous_packet_location.sample_start = 0;
1798 } else {
1799 // Increment sample start
1800 previous_packet_location.sample_start += samples;
1801 }
1802 }
1803
1804 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (ReSample)",
1805 "packet_samples", packet_samples,
1806 "info.channels", info.channels,
1807 "info.sample_rate", info.sample_rate,
1808 "aCodecCtx->sample_fmt", AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx));
1809
1810 // Create output frame
1811 AVFrame *audio_converted = AV_ALLOCATE_FRAME();
1812 AV_RESET_FRAME(audio_converted);
1813 audio_converted->nb_samples = audio_frame->nb_samples;
1814 av_samples_alloc(audio_converted->data, audio_converted->linesize, info.channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0);
1815
1816 SWRCONTEXT *avr = avr_ctx;
1817 // setup resample context if needed
1818 if (!avr) {
1819 avr = SWR_ALLOC();
1820#if HAVE_CH_LAYOUT
1821 av_opt_set_chlayout(avr, "in_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0);
1822 av_opt_set_chlayout(avr, "out_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0);
1823#else
1824 av_opt_set_int(avr, "in_channel_layout", AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout, 0);
1825 av_opt_set_int(avr, "out_channel_layout", AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout, 0);
1826 av_opt_set_int(avr, "in_channels", info.channels, 0);
1827 av_opt_set_int(avr, "out_channels", info.channels, 0);
1828#endif
1829 av_opt_set_int(avr, "in_sample_fmt", AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx), 0);
1830 av_opt_set_int(avr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
1831 av_opt_set_int(avr, "in_sample_rate", info.sample_rate, 0);
1832 av_opt_set_int(avr, "out_sample_rate", info.sample_rate, 0);
1833 SWR_INIT(avr);
1834 avr_ctx = avr;
1835 }
1836
1837 // Convert audio samples
1838 int nb_samples = SWR_CONVERT(avr, // audio resample context
1839 audio_converted->data, // output data pointers
1840 audio_converted->linesize[0], // output plane size, in bytes. (0 if unknown)
1841 audio_converted->nb_samples, // maximum number of samples that the output buffer can hold
1842 audio_frame->data, // input data pointers
1843 audio_frame->linesize[0], // input plane size, in bytes (0 if unknown)
1844 audio_frame->nb_samples); // number of input samples to convert
1845
1846
1847 int64_t starting_frame_number = -1;
1848 for (int channel_filter = 0; channel_filter < info.channels; channel_filter++) {
1849 // Array of floats (to hold samples for each channel)
1850 starting_frame_number = location.frame;
1851 int channel_buffer_size = nb_samples;
1852 auto *channel_buffer = (float *) (audio_converted->data[channel_filter]);
1853
1854 // Loop through samples, and add them to the correct frames
1855 int start = location.sample_start;
1856 int remaining_samples = channel_buffer_size;
1857 while (remaining_samples > 0) {
1858 // Get Samples per frame (for this frame number)
1859 int samples_per_frame = Frame::GetSamplesPerFrame(starting_frame_number, info.fps, info.sample_rate, info.channels);
1860
1861 // Calculate # of samples to add to this frame
1862 int samples = std::fmin(samples_per_frame - start, remaining_samples);
1863
1864 // Create or get the existing frame object
1865 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
1866
1867 // Add samples for current channel to the frame.
1868 f->AddAudio(true, channel_filter, start, channel_buffer, samples, 1.0f);
1869
1870 // Debug output
1871 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (f->AddAudio)",
1872 "frame", starting_frame_number,
1873 "start", start,
1874 "samples", samples,
1875 "channel", channel_filter,
1876 "samples_per_frame", samples_per_frame);
1877
1878 // Add or update cache
1879 working_cache.Add(f);
1880
1881 // Decrement remaining samples
1882 remaining_samples -= samples;
1883
1884 // Increment buffer (to next set of samples)
1885 if (remaining_samples > 0)
1886 channel_buffer += samples;
1887
1888 // Increment frame number
1889 starting_frame_number++;
1890
1891 // Reset starting sample #
1892 start = 0;
1893 }
1894 }
1895
1896 // Free AVFrames
1897 av_free(audio_converted->data[0]);
1898 AV_FREE_FRAME(&audio_converted);
1899 AV_FREE_FRAME(&audio_frame);
1900
1901 // Get audio PTS in seconds
1902 audio_pts_seconds = (double(audio_pts) * info.audio_timebase.ToDouble()) + pts_offset_seconds;
1903
1904 // Debug output
1905 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (After)",
1906 "requested_frame", requested_frame,
1907 "starting_frame", location.frame,
1908 "end_frame", starting_frame_number - 1,
1909 "audio_pts_seconds", audio_pts_seconds);
1910
1911}
1912
1913
1914// Seek to a specific frame. This is not always frame accurate, it's more of an estimation on many codecs.
1915void FFmpegReader::Seek(int64_t requested_frame) {
1916 // Adjust for a requested frame that is too small or too large
1917 if (requested_frame < 1)
1918 requested_frame = 1;
1919 if (requested_frame > info.video_length)
1920 requested_frame = info.video_length;
1921 if (requested_frame > largest_frame_processed && packet_status.end_of_file) {
1922 // Not possible to search past largest_frame once EOF is reached (no more packets)
1923 return;
1924 }
1925
1926 // Debug output
1927 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::Seek",
1928 "requested_frame", requested_frame,
1929 "seek_count", seek_count,
1930 "last_frame", last_frame);
1931
1932 // Clear working cache (since we are seeking to another location in the file)
1933 working_cache.Clear();
1934
1935 // Reset the last frame variable
1936 video_pts = 0.0;
1937 video_pts_seconds = NO_PTS_OFFSET;
1938 audio_pts = 0.0;
1939 audio_pts_seconds = NO_PTS_OFFSET;
1940 hold_packet = false;
1941 last_frame = 0;
1942 current_video_frame = 0;
1943 largest_frame_processed = 0;
1944 bool has_audio_override = info.has_audio;
1945 bool has_video_override = info.has_video;
1946
1947 // Init end-of-file detection variables
1948 packet_status.reset(false);
1949
1950 // Increment seek count
1951 seek_count++;
1952
1953 // If seeking near frame 1, we need to close and re-open the file (this is more reliable than seeking)
1954 int buffer_amount = 12;
1955 if (requested_frame - buffer_amount < 20) {
1956 // prevent Open() from seeking again
1957 is_seeking = true;
1958
1959 // Close and re-open file (basically seeking to frame 1)
1960 Close();
1961 Open();
1962
1963 // Update overrides (since closing and re-opening might update these)
1964 info.has_audio = has_audio_override;
1965 info.has_video = has_video_override;
1966
1967 // Not actually seeking, so clear these flags
1968 is_seeking = false;
1969 if (seek_count == 1) {
1970 // Don't redefine this on multiple seek attempts for a specific frame
1971 seeking_frame = 1;
1972 seeking_pts = ConvertFrameToVideoPTS(1);
1973 }
1974 seek_audio_frame_found = 0; // used to detect which frames to throw away after a seek
1975 seek_video_frame_found = 0; // used to detect which frames to throw away after a seek
1976
1977 } else {
1978 // Seek to nearest key-frame (aka, i-frame)
1979 bool seek_worked = false;
1980 int64_t seek_target = 0;
1981
1982 // Seek video stream (if any), except album arts
1983 if (!seek_worked && info.has_video && !HasAlbumArt()) {
1984 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
1985 if (av_seek_frame(pFormatCtx, info.video_stream_index, seek_target, AVSEEK_FLAG_BACKWARD) < 0) {
1986 fprintf(stderr, "%s: error while seeking video stream\n", pFormatCtx->AV_FILENAME);
1987 } else {
1988 // VIDEO SEEK
1989 is_video_seek = true;
1990 seek_worked = true;
1991 }
1992 }
1993
1994 // Seek audio stream (if not already seeked... and if an audio stream is found)
1995 if (!seek_worked && info.has_audio) {
1996 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
1997 if (av_seek_frame(pFormatCtx, info.audio_stream_index, seek_target, AVSEEK_FLAG_BACKWARD) < 0) {
1998 fprintf(stderr, "%s: error while seeking audio stream\n", pFormatCtx->AV_FILENAME);
1999 } else {
2000 // AUDIO SEEK
2001 is_video_seek = false;
2002 seek_worked = true;
2003 }
2004 }
2005
2006 // Was the seek successful?
2007 if (seek_worked) {
2008 // Flush audio buffer
2009 if (info.has_audio)
2010 avcodec_flush_buffers(aCodecCtx);
2011
2012 // Flush video buffer
2013 if (info.has_video)
2014 avcodec_flush_buffers(pCodecCtx);
2015
2016 // Reset previous audio location to zero
2017 previous_packet_location.frame = -1;
2018 previous_packet_location.sample_start = 0;
2019
2020 // init seek flags
2021 is_seeking = true;
2022 if (seek_count == 1) {
2023 // Don't redefine this on multiple seek attempts for a specific frame
2024 seeking_pts = seek_target;
2025 seeking_frame = requested_frame;
2026 }
2027 seek_audio_frame_found = 0; // used to detect which frames to throw away after a seek
2028 seek_video_frame_found = 0; // used to detect which frames to throw away after a seek
2029
2030 } else {
2031 // seek failed
2032 seeking_pts = 0;
2033 seeking_frame = 0;
2034
2035 // prevent Open() from seeking again
2036 is_seeking = true;
2037
2038 // Close and re-open file (basically seeking to frame 1)
2039 Close();
2040 Open();
2041
2042 // Not actually seeking, so clear these flags
2043 is_seeking = false;
2044
2045 // disable seeking for this reader (since it failed)
2046 enable_seek = false;
2047
2048 // Update overrides (since closing and re-opening might update these)
2049 info.has_audio = has_audio_override;
2050 info.has_video = has_video_override;
2051 }
2052 }
2053}
2054
2055// Get the PTS for the current video packet
2056int64_t FFmpegReader::GetPacketPTS() {
2057 if (packet) {
2058 int64_t current_pts = packet->pts;
2059 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
2060 current_pts = packet->dts;
2061
2062 // Return adjusted PTS
2063 return current_pts;
2064 } else {
2065 // No packet, return NO PTS
2066 return AV_NOPTS_VALUE;
2067 }
2068}
2069
2070// Update PTS Offset (if any)
2071void FFmpegReader::UpdatePTSOffset() {
2072 if (pts_offset_seconds != NO_PTS_OFFSET) {
2073 // Skip this method if we have already set PTS offset
2074 return;
2075 }
2076 pts_offset_seconds = 0.0;
2077 double video_pts_offset_seconds = 0.0;
2078 double audio_pts_offset_seconds = 0.0;
2079
2080 bool has_video_pts = false;
2081 if (!info.has_video) {
2082 // Mark as checked
2083 has_video_pts = true;
2084 }
2085 bool has_audio_pts = false;
2086 if (!info.has_audio) {
2087 // Mark as checked
2088 has_audio_pts = true;
2089 }
2090
2091 // Loop through the stream (until a packet from all streams is found)
2092 while (!has_video_pts || !has_audio_pts) {
2093 // Get the next packet (if any)
2094 if (GetNextPacket() < 0)
2095 // Break loop when no more packets found
2096 break;
2097
2098 // Get PTS of this packet
2099 int64_t pts = GetPacketPTS();
2100
2101 // Video packet
2102 if (!has_video_pts && packet->stream_index == videoStream) {
2103 // Get the video packet start time (in seconds)
2104 video_pts_offset_seconds = 0.0 - (video_pts * info.video_timebase.ToDouble());
2105
2106 // Is timestamp close to zero (within X seconds)
2107 // Ignore wildly invalid timestamps (i.e. -234923423423)
2108 if (std::abs(video_pts_offset_seconds) <= 10.0) {
2109 has_video_pts = true;
2110 }
2111 }
2112 else if (!has_audio_pts && packet->stream_index == audioStream) {
2113 // Get the audio packet start time (in seconds)
2114 audio_pts_offset_seconds = 0.0 - (pts * info.audio_timebase.ToDouble());
2115
2116 // Is timestamp close to zero (within X seconds)
2117 // Ignore wildly invalid timestamps (i.e. -234923423423)
2118 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2119 has_audio_pts = true;
2120 }
2121 }
2122 }
2123
2124 // Do we have all valid timestamps to determine PTS offset?
2125 if (has_video_pts && has_audio_pts) {
2126 // Set PTS Offset to the smallest offset
2127 // [ video timestamp ]
2128 // [ audio timestamp ]
2129 //
2130 // ** SHIFT TIMESTAMPS TO ZERO **
2131 //
2132 //[ video timestamp ]
2133 // [ audio timestamp ]
2134 //
2135 // Since all offsets are negative at this point, we want the max value, which
2136 // represents the closest to zero
2137 pts_offset_seconds = std::max(video_pts_offset_seconds, audio_pts_offset_seconds);
2138 }
2139}
2140
2141// Convert PTS into Frame Number
2142int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2143 // Apply PTS offset
2144 int64_t previous_video_frame = current_video_frame;
2145
2146 // Get the video packet start time (in seconds)
2147 double video_seconds = (double(pts) * info.video_timebase.ToDouble()) + pts_offset_seconds;
2148
2149 // Divide by the video timebase, to get the video frame number (frame # is decimal at this point)
2150 int64_t frame = round(video_seconds * info.fps.ToDouble()) + 1;
2151
2152 // Keep track of the expected video frame #
2153 if (current_video_frame == 0)
2154 current_video_frame = frame;
2155 else {
2156
2157 // Sometimes frames are duplicated due to identical (or similar) timestamps
2158 if (frame == previous_video_frame) {
2159 // return -1 frame number
2160 frame = -1;
2161 } else {
2162 // Increment expected frame
2163 current_video_frame++;
2164 }
2165 }
2166
2167 // Return frame #
2168 return frame;
2169}
2170
2171// Convert Frame Number into Video PTS
2172int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2173 // Get timestamp of this frame (in seconds)
2174 double seconds = (double(frame_number - 1) / info.fps.ToDouble()) + pts_offset_seconds;
2175
2176 // Calculate the # of video packets in this timestamp
2177 int64_t video_pts = round(seconds / info.video_timebase.ToDouble());
2178
2179 // Apply PTS offset (opposite)
2180 return video_pts;
2181}
2182
2183// Convert Frame Number into Video PTS
2184int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2185 // Get timestamp of this frame (in seconds)
2186 double seconds = (double(frame_number - 1) / info.fps.ToDouble()) + pts_offset_seconds;
2187
2188 // Calculate the # of audio packets in this timestamp
2189 int64_t audio_pts = round(seconds / info.audio_timebase.ToDouble());
2190
2191 // Apply PTS offset (opposite)
2192 return audio_pts;
2193}
2194
2195// Calculate Starting video frame and sample # for an audio PTS
2196AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2197 // Get the audio packet start time (in seconds)
2198 double audio_seconds = (double(pts) * info.audio_timebase.ToDouble()) + pts_offset_seconds;
2199
2200 // Divide by the video timebase, to get the video frame number (frame # is decimal at this point)
2201 double frame = (audio_seconds * info.fps.ToDouble()) + 1;
2202
2203 // Frame # as a whole number (no more decimals)
2204 int64_t whole_frame = int64_t(frame);
2205
2206 // Remove the whole number, and only get the decimal of the frame
2207 double sample_start_percentage = frame - double(whole_frame);
2208
2209 // Get Samples per frame
2210 int samples_per_frame = Frame::GetSamplesPerFrame(whole_frame, info.fps, info.sample_rate, info.channels);
2211
2212 // Calculate the sample # to start on
2213 int sample_start = round(double(samples_per_frame) * sample_start_percentage);
2214
2215 // Protect against broken (i.e. negative) timestamps
2216 if (whole_frame < 1)
2217 whole_frame = 1;
2218 if (sample_start < 0)
2219 sample_start = 0;
2220
2221 // Prepare final audio packet location
2222 AudioLocation location = {whole_frame, sample_start};
2223
2224 // Compare to previous audio packet (and fix small gaps due to varying PTS timestamps)
2225 if (previous_packet_location.frame != -1) {
2226 if (location.is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2227 int64_t orig_frame = location.frame;
2228 int orig_start = location.sample_start;
2229
2230 // Update sample start, to prevent gaps in audio
2231 location.sample_start = previous_packet_location.sample_start;
2232 location.frame = previous_packet_location.frame;
2233
2234 // Debug output
2235 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)", "Source Frame", orig_frame, "Source Audio Sample", orig_start, "Target Frame", location.frame, "Target Audio Sample", location.sample_start, "pts", pts);
2236
2237 } else {
2238 // Debug output
2239 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAudioPTSLocation (Audio Gap Ignored - too big)", "Previous location frame", previous_packet_location.frame, "Target Frame", location.frame, "Target Audio Sample", location.sample_start, "pts", pts);
2240 }
2241 }
2242
2243 // Set previous location
2244 previous_packet_location = location;
2245
2246 // Return the associated video frame and starting sample #
2247 return location;
2248}
2249
2250// Create a new Frame (or return an existing one) and add it to the working queue.
2251std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2252 // Check working cache
2253 std::shared_ptr<Frame> output = working_cache.GetFrame(requested_frame);
2254
2255 if (!output) {
2256 // (re-)Check working cache
2257 output = working_cache.GetFrame(requested_frame);
2258 if(output) return output;
2259
2260 // Create a new frame on the working cache
2261 output = std::make_shared<Frame>(requested_frame, info.width, info.height, "#000000", Frame::GetSamplesPerFrame(requested_frame, info.fps, info.sample_rate, info.channels), info.channels);
2262 output->SetPixelRatio(info.pixel_ratio.num, info.pixel_ratio.den); // update pixel ratio
2263 output->ChannelsLayout(info.channel_layout); // update audio channel layout from the parent reader
2264 output->SampleRate(info.sample_rate); // update the frame's sample rate of the parent reader
2265
2266 working_cache.Add(output);
2267
2268 // Set the largest processed frame (if this is larger)
2269 if (requested_frame > largest_frame_processed)
2270 largest_frame_processed = requested_frame;
2271 }
2272 // Return frame
2273 return output;
2274}
2275
2276// Determine if frame is partial due to seek
2277bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2278
2279 // Sometimes a seek gets partial frames, and we need to remove them
2280 bool seek_trash = false;
2281 int64_t max_seeked_frame = seek_audio_frame_found; // determine max seeked frame
2282 if (seek_video_frame_found > max_seeked_frame) {
2283 max_seeked_frame = seek_video_frame_found;
2284 }
2285 if ((info.has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2286 (info.has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2287 seek_trash = true;
2288 }
2289
2290 return seek_trash;
2291}
2292
2293// Check the working queue, and move finished frames to the finished queue
2294void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2295
2296 // Prevent async calls to the following code
2297 const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
2298
2299 // Get a list of current working queue frames in the cache (in-progress frames)
2300 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.GetFrames();
2301 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2302
2303 // Loop through all working queue frames (sorted by frame #)
2304 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2305 {
2306 // Get working frame
2307 std::shared_ptr<Frame> f = *working_itr;
2308
2309 // Was a frame found? Is frame requested yet?
2310 if (!f || f->number > requested_frame) {
2311 // If not, skip to next one
2312 continue;
2313 }
2314
2315 // Calculate PTS in seconds (of working frame), and the most recent processed pts value
2316 double frame_pts_seconds = (double(f->number - 1) / info.fps.ToDouble()) + pts_offset_seconds;
2317 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2318
2319 // Determine if video and audio are ready (based on timestamps)
2320 bool is_video_ready = false;
2321 bool is_audio_ready = false;
2322 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2323 if ((frame_pts_seconds <= video_pts_seconds)
2324 || (recent_pts_diff > 1.5)
2325 || packet_status.video_eof || packet_status.end_of_file) {
2326 // Video stream is past this frame (so it must be done)
2327 // OR video stream is too far behind, missing, or end-of-file
2328 is_video_ready = true;
2329 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (video ready)",
2330 "frame_number", f->number,
2331 "frame_pts_seconds", frame_pts_seconds,
2332 "video_pts_seconds", video_pts_seconds,
2333 "recent_pts_diff", recent_pts_diff);
2334 if (info.has_video && !f->has_image_data) {
2335 // Frame has no image data (copy from previous frame)
2336 // Loop backwards through final frames (looking for the nearest, previous frame image)
2337 for (int64_t previous_frame = requested_frame - 1; previous_frame > 0; previous_frame--) {
2338 std::shared_ptr<Frame> previous_frame_instance = final_cache.GetFrame(previous_frame);
2339 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2340 // Copy image from last decoded frame
2341 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2342 break;
2343 }
2344 }
2345
2346 if (last_video_frame && !f->has_image_data) {
2347 // Copy image from last decoded frame
2348 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2349 } else if (!f->has_image_data) {
2350 f->AddColor("#000000");
2351 }
2352 }
2353 }
2354
2355 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2356 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2357 || (recent_pts_diff > 1.5)
2358 || packet_status.audio_eof || packet_status.end_of_file) {
2359 // Audio stream is past this frame (so it must be done)
2360 // OR audio stream is too far behind, missing, or end-of-file
2361 // Adding a bit of margin here, to allow for partial audio packets
2362 is_audio_ready = true;
2363 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (audio ready)",
2364 "frame_number", f->number,
2365 "frame_pts_seconds", frame_pts_seconds,
2366 "audio_pts_seconds", audio_pts_seconds,
2367 "audio_pts_diff", audio_pts_diff,
2368 "recent_pts_diff", recent_pts_diff);
2369 }
2370 bool is_seek_trash = IsPartialFrame(f->number);
2371
2372 // Adjust for available streams
2373 if (!info.has_video) is_video_ready = true;
2374 if (!info.has_audio) is_audio_ready = true;
2375
2376 // Debug output
2377 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames",
2378 "frame_number", f->number,
2379 "is_video_ready", is_video_ready,
2380 "is_audio_ready", is_audio_ready,
2381 "video_eof", packet_status.video_eof,
2382 "audio_eof", packet_status.audio_eof,
2383 "end_of_file", packet_status.end_of_file);
2384
2385 // Check if working frame is final
2386 if ((!packet_status.end_of_file && is_video_ready && is_audio_ready) || packet_status.end_of_file || is_seek_trash) {
2387 // Debug output
2388 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (mark frame as final)",
2389 "requested_frame", requested_frame,
2390 "f->number", f->number,
2391 "is_seek_trash", is_seek_trash,
2392 "Working Cache Count", working_cache.Count(),
2393 "Final Cache Count", final_cache.Count(),
2394 "end_of_file", packet_status.end_of_file);
2395
2396 if (!is_seek_trash) {
2397 // Move frame to final cache
2398 final_cache.Add(f);
2399
2400 // Remove frame from working cache
2401 working_cache.Remove(f->number);
2402
2403 // Update last frame processed
2404 last_frame = f->number;
2405 } else {
2406 // Seek trash, so delete the frame from the working cache, and never add it to the final cache.
2407 working_cache.Remove(f->number);
2408 }
2409
2410 }
2411 }
2412
2413 // Clear vector of frames
2414 working_frames.clear();
2415 working_frames.shrink_to_fit();
2416}
2417
2418// Check for the correct frames per second (FPS) value by scanning the 1st few seconds of video packets.
2419void FFmpegReader::CheckFPS() {
2420 if (check_fps) {
2421 // Do not check FPS more than 1 time
2422 return;
2423 } else {
2424 check_fps = true;
2425 }
2426
2427 int frames_per_second[3] = {0,0,0};
2428 int max_fps_index = sizeof(frames_per_second) / sizeof(frames_per_second[0]);
2429 int fps_index = 0;
2430
2431 int all_frames_detected = 0;
2432 int starting_frames_detected = 0;
2433
2434 // Loop through the stream
2435 while (true) {
2436 // Get the next packet (if any)
2437 if (GetNextPacket() < 0)
2438 // Break loop when no more packets found
2439 break;
2440
2441 // Video packet
2442 if (packet->stream_index == videoStream) {
2443 // Get the video packet start time (in seconds)
2444 double video_seconds = (double(GetPacketPTS()) * info.video_timebase.ToDouble()) + pts_offset_seconds;
2445 fps_index = int(video_seconds); // truncate float timestamp to int (second 1, second 2, second 3)
2446
2447 // Is this video packet from the first few seconds?
2448 if (fps_index >= 0 && fps_index < max_fps_index) {
2449 // Yes, keep track of how many frames per second (over the first few seconds)
2450 starting_frames_detected++;
2451 frames_per_second[fps_index]++;
2452 }
2453
2454 // Track all video packets detected
2455 all_frames_detected++;
2456 }
2457 }
2458
2459 // Calculate FPS (based on the first few seconds of video packets)
2460 float avg_fps = 30.0;
2461 if (starting_frames_detected > 0 && fps_index > 0) {
2462 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2463 }
2464
2465 // Verify average FPS is a reasonable value
2466 if (avg_fps < 8.0) {
2467 // Invalid FPS assumed, so switching to a sane default FPS instead
2468 avg_fps = 30.0;
2469 }
2470
2471 // Update FPS (truncate average FPS to Integer)
2472 info.fps = Fraction(int(avg_fps), 1);
2473
2474 // Update Duration and Length
2475 if (all_frames_detected > 0) {
2476 // Use all video frames detected to calculate # of frames
2477 info.video_length = all_frames_detected;
2478 info.duration = all_frames_detected / avg_fps;
2479 } else {
2480 // Use previous duration to calculate # of frames
2481 info.video_length = info.duration * avg_fps;
2482 }
2483
2484 // Update video bit rate
2485 info.video_bit_rate = info.file_size / info.duration;
2486}
2487
2488// Remove AVFrame from cache (and deallocate its memory)
2489void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2490 // Remove pFrame (if exists)
2491 if (remove_frame) {
2492 // Free memory
2493 av_freep(&remove_frame->data[0]);
2494#ifndef WIN32
2495 AV_FREE_FRAME(&remove_frame);
2496#endif
2497 }
2498}
2499
2500// Remove AVPacket from cache (and deallocate its memory)
2501void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2502 // deallocate memory for packet
2503 AV_FREE_PACKET(remove_packet);
2504
2505 // Delete the object
2506 delete remove_packet;
2507}
2508
2509// Generate JSON string of this object
2510std::string FFmpegReader::Json() const {
2511
2512 // Return formatted string
2513 return JsonValue().toStyledString();
2514}
2515
2516// Generate Json::Value for this object
2517Json::Value FFmpegReader::JsonValue() const {
2518
2519 // Create root json object
2520 Json::Value root = ReaderBase::JsonValue(); // get parent properties
2521 root["type"] = "FFmpegReader";
2522 root["path"] = path;
2523 switch (duration_strategy) {
2525 root["duration_strategy"] = "VideoPreferred";
2526 break;
2528 root["duration_strategy"] = "AudioPreferred";
2529 break;
2531 default:
2532 root["duration_strategy"] = "LongestStream";
2533 break;
2534 }
2535
2536 // return JsonValue
2537 return root;
2538}
2539
2540// Load JSON string into this object
2541void FFmpegReader::SetJson(const std::string value) {
2542
2543 // Parse JSON string into JSON objects
2544 try {
2545 const Json::Value root = openshot::stringToJson(value);
2546 // Set all values that match
2547 SetJsonValue(root);
2548 }
2549 catch (const std::exception& e) {
2550 // Error parsing JSON (or missing keys)
2551 throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
2552 }
2553}
2554
2555// Load Json::Value into this object
2556void FFmpegReader::SetJsonValue(const Json::Value root) {
2557
2558 // Set parent data
2560
2561 // Set data from Json (if key is found)
2562 if (!root["path"].isNull())
2563 path = root["path"].asString();
2564 if (!root["duration_strategy"].isNull()) {
2565 const std::string strategy = root["duration_strategy"].asString();
2566 if (strategy == "VideoPreferred") {
2567 duration_strategy = DurationStrategy::VideoPreferred;
2568 } else if (strategy == "AudioPreferred") {
2569 duration_strategy = DurationStrategy::AudioPreferred;
2570 } else {
2571 duration_strategy = DurationStrategy::LongestStream;
2572 }
2573 }
2574
2575 // Re-Open path, and re-init everything (if needed)
2576 if (is_open) {
2577 Close();
2578 Open();
2579 }
2580}
Shared helpers for Crop effect scaling logic.
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
int hw_de_on
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define SWR_INIT(ctx)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define SWR_ALLOC()
#define SWR_CLOSE(ctx)
#define AV_GET_CODEC_TYPE(av_stream)
#define PixelFormat
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_REGISTER_ALL
#define PIX_FMT_RGBA
#define SWR_FREE(ctx)
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define SWRCONTEXT
#define AVCODEC_REGISTER_ALL
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define AV_ALLOCATE_IMAGE(av_frame, pix_fmt, width, height)
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
AVDictionary * opts
if(!codec) codec
Cross-platform helper to encourage returning freed memory to the OS.
#define FF_VIDEO_NUM_PROCESSORS
#define FF_AUDIO_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1).
Definition Clip.h:316
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any).
Definition Clip.h:294
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1).
Definition Clip.h:317
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
Definition Clip.h:177
double Y
The Y value of the coordinate (usually representing the value of the property being animated).
Definition Coordinate.h:41
void Open() override
Open File - which is called by the constructor automatically.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame().
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number).
Definition Frame.cpp:484
Exception when no valid codec is found for a file.
Definition Exceptions.h:173
Exception for files that can not be found or opened.
Definition Exceptions.h:188
Exception for invalid JSON.
Definition Exceptions.h:218
Point GetMaxPoint() const
Get max point (by Y coordinate).
Definition KeyFrame.cpp:245
Exception when no streams are found in the file.
Definition Exceptions.h:286
Coordinate co
This is the primary coordinate.
Definition Point.h:66
openshot::ReaderInfo info
Information about the current media file.
Definition ReaderBase.h:88
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
Definition ReaderBase.h:79
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL).
Exception when a reader is closed, and a frame is requested.
Definition Exceptions.h:364
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
Definition Settings.h:77
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first).
Definition Settings.h:80
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
Definition Settings.h:74
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method).
Definition Settings.cpp:23
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported).
Definition Settings.h:62
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method).
Definition ZmqLogger.cpp:35
This namespace is the default namespace for all code in the openshot library.
Definition Compressor.h:29
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping).
Definition Enums.h:38
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit).
Definition Enums.h:39
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap).
Definition Enums.h:37
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
DurationStrategy
This enumeration determines which duration source to favor.
Definition Enums.h:60
@ VideoPreferred
Prefer the video stream's duration, fallback to audio then container.
Definition Enums.h:62
@ LongestStream
Use the longest value from video, audio, or container.
Definition Enums.h:61
@ AudioPreferred
Prefer the audio stream's duration, fallback to video then container.
Definition Enums.h:63
bool TrimMemoryToOS(bool force) noexcept
Attempt to return unused heap memory to the operating system.
void ApplyCropResizeScale(Clip *clip, int source_width, int source_height, int &max_width, int &max_height)
Scale the requested max_width / max_height based on the Crop resize amount, capped by source size.
const Json::Value stringToJson(const std::string value)
Definition Json.cpp:16
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int width
The width of the video (in pixesl).
Definition ReaderBase.h:46