67 D3D12_VIDEO_MOTION_ESTIMATOR_VECTOR_PRECISION
precision;
94 if (
s->block_size != 8 &&
s->block_size != 16) {
100 if (
s->block_size == 8)
101 s->d3d12_block_size = D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_8X8;
103 s->d3d12_block_size = D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_16X16;
106 s->precision = D3D12_VIDEO_MOTION_ESTIMATOR_VECTOR_PRECISION_QUARTER_PEL;
115 D3D12_COMMAND_QUEUE_DESC queue_desc = {
116 .Type = D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE,
118 .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
123 hr = ID3D12Device_CreateFence(
s->device, 0, D3D12_FENCE_FLAG_NONE,
124 &IID_ID3D12Fence, (
void **)&
s->fence);
130 s->fence_event = CreateEvent(
NULL, FALSE, FALSE,
NULL);
131 if (!
s->fence_event) {
137 hr = ID3D12Device_CreateCommandQueue(
s->device, &queue_desc,
138 &IID_ID3D12CommandQueue, (
void **)&
s->command_queue);
145 hr = ID3D12Device_CreateCommandAllocator(
s->device, D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE,
146 &IID_ID3D12CommandAllocator, (
void **)&
s->command_allocator);
153 hr = ID3D12Device_CreateCommandList(
s->device, 0, D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE,
154 s->command_allocator,
NULL, &IID_ID3D12VideoEncodeCommandList,
155 (
void **)&
s->command_list);
161 hr = ID3D12VideoEncodeCommandList_Close(
s->command_list);
174 D3D12_FEATURE_DATA_VIDEO_MOTION_ESTIMATOR feature_data = {0};
175 D3D12_VIDEO_MOTION_ESTIMATOR_DESC me_desc = {0};
176 D3D12_VIDEO_MOTION_VECTOR_HEAP_DESC heap_desc = {0};
180 feature_data.NodeIndex = 0;
181 feature_data.InputFormat =
s->frames_ctx->format;
182 feature_data.BlockSizeFlags = 0;
183 feature_data.PrecisionFlags = 0;
184 feature_data.SizeRange.MaxWidth =
width;
185 feature_data.SizeRange.MaxHeight =
height;
186 feature_data.SizeRange.MinWidth =
width;
187 feature_data.SizeRange.MinHeight =
height;
189 hr = ID3D12VideoDevice1_CheckFeatureSupport(
s->video_device,
190 D3D12_FEATURE_VIDEO_MOTION_ESTIMATOR,
191 &feature_data,
sizeof(feature_data));
198 D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_FLAGS requested_block_flag =
199 (
s->d3d12_block_size == D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_8X8) ?
200 D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_FLAG_8X8 :
201 D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_FLAG_16X16;
203 if (!(feature_data.BlockSizeFlags & requested_block_flag)) {
204 av_log(
ctx,
AV_LOG_ERROR,
"Requested block size (%dx%d) not supported by device (supported flags: 0x%x)\n",
205 s->block_size,
s->block_size, feature_data.BlockSizeFlags);
209 if (!(feature_data.PrecisionFlags & D3D12_VIDEO_MOTION_ESTIMATOR_VECTOR_PRECISION_FLAG_QUARTER_PEL)) {
211 feature_data.PrecisionFlags);
216 s->block_size,
s->block_size);
219 me_desc.NodeMask = 0;
220 me_desc.InputFormat =
s->frames_ctx->format;
221 me_desc.BlockSize =
s->d3d12_block_size;
222 me_desc.Precision =
s->precision;
223 me_desc.SizeRange = feature_data.SizeRange;
225 hr = ID3D12VideoDevice1_CreateVideoMotionEstimator(
s->video_device, &me_desc,
NULL,
226 &IID_ID3D12VideoMotionEstimator,
227 (
void **)&
s->motion_estimator);
234 heap_desc.NodeMask = 0;
235 heap_desc.InputFormat =
s->frames_ctx->format;
236 heap_desc.BlockSize =
s->d3d12_block_size;
237 heap_desc.Precision =
s->precision;
238 heap_desc.SizeRange = feature_data.SizeRange;
240 hr = ID3D12VideoDevice1_CreateVideoMotionVectorHeap(
s->video_device, &heap_desc,
NULL,
241 &IID_ID3D12VideoMotionVectorHeap,
242 (
void **)&
s->motion_vector_heap);
250 int mb_width = (
width +
s->block_size - 1) /
s->block_size;
251 int mb_height = (
height +
s->block_size - 1) /
s->block_size;
253 D3D12_HEAP_PROPERTIES heap_props_default = {.Type = D3D12_HEAP_TYPE_DEFAULT};
254 D3D12_RESOURCE_DESC texture_desc = {
255 .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
259 .DepthOrArraySize = 1,
261 .Format = DXGI_FORMAT_R16G16_SINT,
262 .SampleDesc = {.Count = 1, .Quality = 0},
263 .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
264 .Flags = D3D12_RESOURCE_FLAG_NONE,
267 hr = ID3D12Device_CreateCommittedResource(
s->device, &heap_props_default, D3D12_HEAP_FLAG_NONE,
268 &texture_desc, D3D12_RESOURCE_STATE_COMMON,
NULL,
269 &IID_ID3D12Resource, (
void **)&
s->resolved_mv_texture_back);
275 hr = ID3D12Device_CreateCommittedResource(
s->device, &heap_props_default, D3D12_HEAP_FLAG_NONE,
276 &texture_desc, D3D12_RESOURCE_STATE_COMMON,
NULL,
277 &IID_ID3D12Resource, (
void **)&
s->resolved_mv_texture_fwd);
286 D3D12_PLACED_SUBRESOURCE_FOOTPRINT temp_layout;
287 UINT64 temp_total_size;
289 ID3D12Device_GetCopyableFootprints(
s->device, &texture_desc, 0, 1, 0,
290 &temp_layout,
NULL,
NULL, &temp_total_size);
292 s->readback_buffer_size = temp_total_size;
295 (
unsigned long long)
s->readback_buffer_size, mb_width, mb_height, temp_layout.Footprint.RowPitch);
297 D3D12_HEAP_PROPERTIES heap_props_readback = {.Type = D3D12_HEAP_TYPE_READBACK};
298 D3D12_RESOURCE_DESC buffer_desc = {
299 .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
301 .Width =
s->readback_buffer_size,
303 .DepthOrArraySize = 1,
305 .Format = DXGI_FORMAT_UNKNOWN,
306 .SampleDesc = {.Count = 1, .Quality = 0},
307 .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
308 .Flags = D3D12_RESOURCE_FLAG_NONE,
311 hr = ID3D12Device_CreateCommittedResource(
s->device, &heap_props_readback, D3D12_HEAP_FLAG_NONE,
312 &buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST,
NULL,
313 &IID_ID3D12Resource, (
void **)&
s->readback_buffer_back);
319 hr = ID3D12Device_CreateCommittedResource(
s->device, &heap_props_readback, D3D12_HEAP_FLAG_NONE,
320 &buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST,
NULL,
321 &IID_ID3D12Resource, (
void **)&
s->readback_buffer_fwd);
328 D3D12_COMMAND_QUEUE_DESC copy_queue_desc = {
329 .Type = D3D12_COMMAND_LIST_TYPE_DIRECT,
331 .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
335 hr = ID3D12Device_CreateCommandQueue(
s->device, ©_queue_desc,
336 &IID_ID3D12CommandQueue, (
void **)&
s->copy_command_queue);
342 hr = ID3D12Device_CreateCommandAllocator(
s->device, D3D12_COMMAND_LIST_TYPE_DIRECT,
343 &IID_ID3D12CommandAllocator, (
void **)&
s->copy_command_allocator);
349 hr = ID3D12Device_CreateCommandList(
s->device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT,
350 s->copy_command_allocator,
NULL, &IID_ID3D12GraphicsCommandList,
351 (
void **)&
s->copy_command_list);
357 hr = ID3D12GraphicsCommandList_Close(
s->copy_command_list);
389 if (!
s->hw_frames_ref)
392 s->frames_ctx = hw_frames_ctx->
hwctx;
394 if (!
s->hw_device_ref)
398 s->device =
s->device_ctx->device;
406 hr = ID3D12VideoDevice_QueryInterface(
s->device_ctx->video_device, &IID_ID3D12VideoDevice1,
407 (
void **)&
s->video_device);
428 uint64_t completion = ID3D12Fence_GetCompletedValue(
s->fence);
430 if (completion < s->fence_value) {
431 if (FAILED(ID3D12Fence_SetEventOnCompletion(
s->fence,
s->fence_value,
s->fence_event)))
433 WaitForSingleObjectEx(
s->fence_event, INFINITE, FALSE);
440 ID3D12Resource *resource,
441 D3D12_RESOURCE_STATES state_before,
442 D3D12_RESOURCE_STATES state_after)
444 barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
445 barrier->Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
446 barrier->Transition.pResource = resource;
447 barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
448 barrier->Transition.StateBefore = state_before;
449 barrier->Transition.StateAfter = state_after;
453 int x,
int y,
int x_mv,
int y_mv,
int dir)
457 mv->dst_x = x + (mb_size >> 1);
458 mv->dst_y = y + (mb_size >> 1);
459 mv->src_x = x_mv + (mb_size >> 1);
460 mv->src_y = y_mv + (mb_size >> 1);
461 mv->source = dir ? 1 : -1;
463 mv->motion_x = x_mv - x;
464 mv->motion_y = y_mv - y;
465 mv->motion_scale = 1;
471 uint8_t *mapped_data =
NULL;
476 int mb_x, mb_y, mv_idx;
477 int mb_width, mb_height;
479 ID3D12Resource *
buffer = (direction == 0) ?
s->readback_buffer_back :
s->readback_buffer_fwd;
482 hr = ID3D12Resource_Map(
buffer, 0,
NULL, (
void **)&mapped_data);
484 av_log(
ctx,
AV_LOG_ERROR,
"Failed to map readback buffer (dir=%d, hr=0x%lx)\n", direction, (
long)hr);
497 mb_width = (
out->width +
s->block_size - 1) /
s->block_size;
498 mb_height = (
out->height +
s->block_size - 1) /
s->block_size;
501 mv_idx = direction * mb_width * mb_height;
514 d3d12_mvs = (int16_t *)mapped_data;
516 for (mb_y = 0; mb_y < mb_height; mb_y++) {
517 for (mb_x = 0; mb_x < mb_width; mb_x++) {
518 const int x_mb = mb_x *
s->block_size;
519 const int y_mb = mb_y *
s->block_size;
520 const int mv_offset = (mb_y * mb_width + mb_x) * 2;
525 int16_t mv_x_qpel = d3d12_mvs[mv_offset + 0];
526 int16_t mv_y_qpel = d3d12_mvs[mv_offset + 1];
531 int src_x = x_mb + (mv_x_qpel / 4);
532 int src_y = y_mb + (mv_y_qpel / 4);
536 add_mv_data(&mvs[mv_idx++],
s->block_size, x_mb, y_mb, src_x, src_y, direction);
539 mb_x, mb_y, direction, mv_x_qpel, mv_y_qpel,
540 mv_x_qpel / 4, mv_y_qpel / 4);
547 mb_width * mb_height, direction);
561 int mb_width, mb_height, mb_count;
563 if (!
s->initialized) {
573 s->prev_frame =
s->cur_frame;
574 s->cur_frame =
s->next_frame;
591 mb_width = (
frame->width +
s->block_size - 1) /
s->block_size;
592 mb_height = (
frame->height +
s->block_size - 1) /
s->block_size;
593 mb_count = mb_width * mb_height;
610 hr = ID3D12CommandAllocator_Reset(
s->command_allocator);
617 hr = ID3D12VideoEncodeCommandList_Reset(
s->command_list,
s->command_allocator);
625 D3D12_RESOURCE_BARRIER barriers[3];
626 int barrier_count = 2;
629 D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
631 D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
635 D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
639 ID3D12VideoEncodeCommandList_ResourceBarrier(
s->command_list, barrier_count, barriers);
642 D3D12_VIDEO_MOTION_ESTIMATOR_INPUT input_back = {
643 .pInputTexture2D = cur_hwframe->
texture,
644 .InputSubresourceIndex = 0,
645 .pReferenceTexture2D = prev_hwframe->
texture,
646 .ReferenceSubresourceIndex = 0,
647 .pHintMotionVectorHeap =
NULL,
650 D3D12_VIDEO_MOTION_ESTIMATOR_OUTPUT
output = {
651 .pMotionVectorHeap =
s->motion_vector_heap,
654 ID3D12VideoEncodeCommandList_EstimateMotion(
s->command_list,
s->motion_estimator,
657 D3D12_RESOLVE_VIDEO_MOTION_VECTOR_HEAP_INPUT resolve_input = {
658 .pMotionVectorHeap =
s->motion_vector_heap,
659 .PixelWidth =
s->cur_frame->width,
660 .PixelHeight =
s->cur_frame->height,
663 D3D12_RESOLVE_VIDEO_MOTION_VECTOR_HEAP_OUTPUT resolve_output_back = {
664 .pMotionVectorTexture2D =
s->resolved_mv_texture_back,
665 .MotionVectorCoordinate = {.X = 0, .Y = 0, .Z = 0, .SubresourceIndex = 0},
668 ID3D12VideoEncodeCommandList_ResolveMotionVectorHeap(
s->command_list,
669 &resolve_output_back, &resolve_input);
677 D3D12_VIDEO_MOTION_ESTIMATOR_INPUT input_fwd = {
678 .pInputTexture2D = cur_hwframe->
texture,
679 .InputSubresourceIndex = 0,
680 .pReferenceTexture2D = next_hwframe->
texture,
681 .ReferenceSubresourceIndex = 0,
682 .pHintMotionVectorHeap =
NULL,
685 ID3D12VideoEncodeCommandList_EstimateMotion(
s->command_list,
s->motion_estimator,
688 D3D12_RESOLVE_VIDEO_MOTION_VECTOR_HEAP_OUTPUT resolve_output_fwd = {
689 .pMotionVectorTexture2D =
s->resolved_mv_texture_fwd,
690 .MotionVectorCoordinate = {.X = 0, .Y = 0, .Z = 0, .SubresourceIndex = 0},
693 ID3D12VideoEncodeCommandList_ResolveMotionVectorHeap(
s->command_list,
694 &resolve_output_fwd, &resolve_input);
700 for (
int i = 0;
i < barrier_count;
i++)
701 FFSWAP(D3D12_RESOURCE_STATES, barriers[
i].Transition.StateBefore, barriers[
i].Transition.StateAfter);
703 ID3D12VideoEncodeCommandList_ResourceBarrier(
s->command_list, barrier_count, barriers);
706 hr = ID3D12VideoEncodeCommandList_Close(
s->command_list);
714 hr = ID3D12CommandQueue_Wait(
s->command_queue, cur_hwframe->
sync_ctx.
fence,
722 hr = ID3D12CommandQueue_Wait(
s->command_queue, prev_hwframe->
sync_ctx.
fence,
731 hr = ID3D12CommandQueue_Wait(
s->command_queue, next_hwframe->
sync_ctx.
fence,
741 ID3D12CommandQueue_ExecuteCommandLists(
s->command_queue, 1, (ID3D12CommandList **)&
s->command_list);
744 hr = ID3D12CommandQueue_Signal(
s->command_queue,
s->fence, ++
s->fence_value);
759 hr = ID3D12CommandAllocator_Reset(
s->copy_command_allocator);
766 hr = ID3D12GraphicsCommandList_Reset(
s->copy_command_list,
s->copy_command_allocator,
NULL);
774 D3D12_RESOURCE_BARRIER copy_barriers[2];
775 int copy_barrier_count = 1;
778 D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
782 D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
783 copy_barrier_count = 2;
786 ID3D12GraphicsCommandList_ResourceBarrier(
s->copy_command_list, copy_barrier_count, copy_barriers);
789 D3D12_RESOURCE_DESC texture_desc_back;
790 D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout_back;
791 UINT64 row_size_back, total_size_back;
795 s->resolved_mv_texture_back->lpVtbl->GetDesc(
s->resolved_mv_texture_back, &texture_desc_back);
798 (
unsigned long long)texture_desc_back.Width, texture_desc_back.Height, texture_desc_back.Format);
801 ID3D12Device_GetCopyableFootprints(
s->device, &texture_desc_back, 0, 1, 0,
802 &layout_back, &num_rows_back, &row_size_back, &total_size_back);
805 (
unsigned long long)layout_back.Offset, layout_back.Footprint.Width, layout_back.Footprint.Height,
806 layout_back.Footprint.Depth, layout_back.Footprint.RowPitch);
809 D3D12_TEXTURE_COPY_LOCATION src_back = {
810 .pResource =
s->resolved_mv_texture_back,
811 .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
812 .SubresourceIndex = 0
815 D3D12_TEXTURE_COPY_LOCATION dst_back = {
816 .pResource =
s->readback_buffer_back,
817 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
820 .Footprint = layout_back.Footprint
825 ID3D12GraphicsCommandList_CopyTextureRegion(
s->copy_command_list, &dst_back, 0, 0, 0, &src_back,
NULL);
830 D3D12_RESOURCE_DESC texture_desc_fwd;
831 D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout_fwd;
832 UINT64 row_size_fwd, total_size_fwd;
836 s->resolved_mv_texture_fwd->lpVtbl->GetDesc(
s->resolved_mv_texture_fwd, &texture_desc_fwd);
839 (
unsigned long long)texture_desc_fwd.Width, texture_desc_fwd.Height, texture_desc_fwd.Format);
842 ID3D12Device_GetCopyableFootprints(
s->device, &texture_desc_fwd, 0, 1, 0,
843 &layout_fwd, &num_rows_fwd, &row_size_fwd, &total_size_fwd);
846 (
unsigned long long)layout_fwd.Offset, layout_fwd.Footprint.Width, layout_fwd.Footprint.Height,
847 layout_fwd.Footprint.Depth, layout_fwd.Footprint.RowPitch);
849 D3D12_TEXTURE_COPY_LOCATION src_fwd = {
850 .pResource =
s->resolved_mv_texture_fwd,
851 .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
852 .SubresourceIndex = 0
855 D3D12_TEXTURE_COPY_LOCATION dst_fwd = {
856 .pResource =
s->readback_buffer_fwd,
857 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
860 .Footprint = layout_fwd.Footprint
865 ID3D12GraphicsCommandList_CopyTextureRegion(
s->copy_command_list, &dst_fwd, 0, 0, 0, &src_fwd,
NULL);
869 for (
int i = 0;
i < copy_barrier_count;
i++)
870 FFSWAP(D3D12_RESOURCE_STATES, copy_barriers[
i].Transition.StateBefore, copy_barriers[
i].Transition.StateAfter);
872 ID3D12GraphicsCommandList_ResourceBarrier(
s->copy_command_list, copy_barrier_count, copy_barriers);
874 hr = ID3D12GraphicsCommandList_Close(
s->copy_command_list);
882 ID3D12CommandQueue_ExecuteCommandLists(
s->copy_command_queue, 1, (ID3D12CommandList **)&
s->copy_command_list);
885 hr = ID3D12CommandQueue_Signal(
s->copy_command_queue,
s->fence, ++
s->fence_value);
939 CloseHandle(
s->fence_event);
961 #define OFFSET(x) offsetof(MEstimateD3D12Context, x)
962 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
965 {
"mb_size",
"macroblock size, only 8 and 16 are supported",
OFFSET(block_size),
AV_OPT_TYPE_INT, {.i64 = 16}, 8, 16,
FLAGS },
972 .
p.
name =
"mestimate_d3d12",
973 .p.description =
NULL_IF_CONFIG_SMALL(
"Generate motion vectors using D3D12 hardware acceleration."),
974 .p.priv_class = &mestimate_d3d12_class,