40 #define RGB_LINECACHE 4
154 AVFrame *enc_in, VkImageView *enc_in_views,
155 FFVkBuffer *slice_data_buf, uint32_t slice_data_size,
166 0, slice_data_size*
f->slice_count,
167 VK_FORMAT_UNDEFINED);
169 enc_in, enc_in_views,
171 VK_IMAGE_LAYOUT_GENERAL,
176 VK_SHADER_STAGE_COMPUTE_BIT,
185 AVFrame *enc_in, VkImageView *enc_in_views,
195 enc_in, enc_in_views,
197 VK_IMAGE_LAYOUT_GENERAL,
202 0, fltmap_size*
f->slice_count,
203 VK_FORMAT_UNDEFINED);
207 VK_SHADER_STAGE_COMPUTE_BIT,
216 AVFrame *enc_in, VkImageView *enc_in_views,
226 enc_in, enc_in_views,
228 VK_IMAGE_LAYOUT_GENERAL,
233 0, units_size*
f->slice_count,
234 VK_FORMAT_UNDEFINED);
238 VK_SHADER_STAGE_COMPUTE_BIT,
260 uint32_t plane_state_size;
261 uint32_t slice_state_size;
262 uint32_t slice_data_size;
267 uint32_t remap_data_size = 0;
273 int has_inter = avctx->
gop_size > 1;
274 uint32_t context_count =
f->context_count[
f->context_model];
276 VkImageMemoryBarrier2 img_bar[37];
278 VkBufferMemoryBarrier2 buf_bar[8];
282 f->cur_enc_frame = pict;
291 f->slice_count =
f->max_slice_count;
295 plane_state_size = 8;
299 plane_state_size *= context_count;
300 slice_state_size = plane_state_size*
f->plane_count;
302 slice_data_size = 256;
303 slice_state_size += slice_data_size;
304 slice_state_size =
FFALIGN(slice_state_size, 8);
308 if (!slice_data_ref) {
311 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
312 NULL, slice_state_size*
f->slice_count,
313 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
327 remap_data_size = 4*(1 <<
desc->comp[0].depth)*
sizeof(uint32_t);
332 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
333 NULL, remap_data_size*
f->slice_count,
334 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
343 VkMemoryPropertyFlagBits out_buf_flags;
344 if (maxsize < fv->max_heap_size) {
345 out_buf_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
348 out_buf_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
350 out_buf_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
356 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
357 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
358 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
359 NULL, maxsize, out_buf_flags));
385 .plane_state_size = plane_state_size,
386 .key_frame =
f->key_frame,
388 .micro_version =
f->micro_version,
394 .slice_size_max = out_data_buf->
size /
f->slice_count,
398 for (
int i = 0;
i <
f->quant_table_count;
i++) {
401 f->quant_tables[
i][4][127];
408 memcpy(pd.
fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
427 remap_data_ref =
NULL;
431 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
432 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
435 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
436 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
441 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
442 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
443 VK_ACCESS_SHADER_READ_BIT,
444 VK_IMAGE_LAYOUT_GENERAL,
445 VK_QUEUE_FAMILY_IGNORED);
447 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
448 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
449 .pImageMemoryBarriers = img_bar,
450 .imageMemoryBarrierCount = nb_img_bar,
458 slice_data_buf, slice_data_size, &pd));
462 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
463 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
464 0, slice_data_size*
f->slice_count);
470 remap_data_buf, remap_data_size, &pd));
473 remap_data_buf, remap_data_size, &pd));
478 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
479 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
480 0, remap_data_size*
f->slice_count);
484 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
485 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
486 .pBufferMemoryBarriers = buf_bar,
487 .bufferMemoryBarrierCount = nb_buf_bar,
496 0, slice_data_size*
f->slice_count,
497 VK_FORMAT_UNDEFINED);
502 0, remap_data_size*
f->slice_count,
503 VK_FORMAT_UNDEFINED);
507 VK_SHADER_STAGE_COMPUTE_BIT,
515 vkf->
layout[0] = VK_IMAGE_LAYOUT_UNDEFINED;
516 vkf->
access[0] = VK_ACCESS_2_NONE;
523 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
524 VK_PIPELINE_STAGE_2_CLEAR_BIT,
525 VK_ACCESS_2_TRANSFER_WRITE_BIT,
526 VK_IMAGE_LAYOUT_GENERAL,
527 VK_QUEUE_FAMILY_IGNORED);
528 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
529 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
530 .pImageMemoryBarriers = img_bar,
531 .imageMemoryBarrierCount = nb_img_bar,
535 vk->CmdClearColorImage(exec->
buf, vkf->
img[0], VK_IMAGE_LAYOUT_GENERAL,
536 &((VkClearColorValue) { 0 }),
537 1, &((VkImageSubresourceRange) {
538 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
549 0, slice_data_size*
f->slice_count,
550 VK_FORMAT_UNDEFINED);
556 VK_FORMAT_UNDEFINED);
560 VK_SHADER_STAGE_COMPUTE_BIT,
569 COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
570 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
571 0, slice_data_size*
f->slice_count);
577 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
578 COMPUTE_SHADER_BIT, SHADER_READ_BIT, NONE_KHR,
579 0, remap_data_size*
f->slice_count);
582 COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
583 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
584 slice_data_size*
f->slice_count, VK_WHOLE_SIZE);
587 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
588 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
589 slice_data_size*
f->slice_count, VK_WHOLE_SIZE);
592 fv->
optimize_rct ? VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT :
593 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
594 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
595 VK_ACCESS_SHADER_READ_BIT,
596 VK_IMAGE_LAYOUT_GENERAL,
597 VK_QUEUE_FAMILY_IGNORED);
601 VK_PIPELINE_STAGE_2_CLEAR_BIT,
602 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
603 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
604 VK_IMAGE_LAYOUT_GENERAL,
605 VK_QUEUE_FAMILY_IGNORED);
607 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
608 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
609 .pImageMemoryBarriers = img_bar,
610 .imageMemoryBarrierCount = nb_img_bar,
611 .pBufferMemoryBarriers = buf_bar,
612 .bufferMemoryBarrierCount = nb_buf_bar,
621 0, slice_data_size*
f->slice_count,
622 VK_FORMAT_UNDEFINED);
626 fd->
idx*
f->max_slice_count*
sizeof(uint32_t),
627 f->slice_count*
sizeof(uint32_t),
628 VK_FORMAT_UNDEFINED);
634 VK_FORMAT_UNDEFINED);
638 VK_IMAGE_LAYOUT_GENERAL,
644 VK_IMAGE_LAYOUT_GENERAL,
650 0, remap_data_size*
f->slice_count,
651 VK_FORMAT_UNDEFINED);
655 VK_SHADER_STAGE_COMPUTE_BIT,
679 VkBufferCopy *buf_regions,
int nb_regions,
693 VkBufferMemoryBarrier2 buf_bar[8];
697 VK_BUFFER_USAGE_TRANSFER_DST_BIT);
715 COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
716 TRANSFER_BIT, TRANSFER_READ_BIT, NONE_KHR,
718 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
719 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
720 .pBufferMemoryBarriers = buf_bar,
721 .bufferMemoryBarrierCount = nb_buf_bar,
725 for (
int i = 0;
i < nb_regions;
i++)
726 buf_regions[
i].dstOffset += mapped_buf->virtual_offset;
728 vk->CmdCopyBuffer(exec->
buf,
729 out_data_buf->
buf, mapped_buf->buf,
730 nb_regions, buf_regions);
753 uint32_t slice_size_max = out_data_buf->
size /
f->slice_count;
759 uint32_t rb_off = fd->
idx*
f->max_slice_count*
sizeof(uint32_t);
761 VkMappedMemoryRange invalidate_data = {
762 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
765 .size =
f->slice_count*
sizeof(uint32_t),
768 1, &invalidate_data);
774 for (
int i = 0;
i <
f->slice_count;
i++) {
775 uint32_t sl_len =
AV_RN32(rb +
i*4);
779 .srcOffset =
i*slice_size_max,
811 if (!(out_data_buf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
812 VkMappedMemoryRange invalidate_data = {
813 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
814 .memory = out_data_buf->
mem,
816 .size = VK_WHOLE_SIZE,
819 1, &invalidate_data);
823 for (
int i = 0;
i <
f->slice_count;
i++) {
825 memcpy(
pkt->
data + region->dstOffset,
907 vk_frames = frames_ctx->
hwctx;
908 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
909 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT |
910 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
911 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
931 (uint32_t []) { 32, 32, 1 }, 0);
934 VK_SHADER_STAGE_COMPUTE_BIT);
938 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
939 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
946 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
947 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
950 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
951 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
975 (uint32_t []) { wg_x, 1, 1 }, 0);
978 VK_SHADER_STAGE_COMPUTE_BIT);
982 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
983 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
990 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
991 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
994 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
995 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
999 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1000 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1022 (uint32_t []) { 32, 32, 1 }, 0);
1025 VK_SHADER_STAGE_COMPUTE_BIT);
1029 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1030 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1037 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1038 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1041 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1042 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1046 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1047 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1069 (uint32_t []) { 1, 1, 1 }, 0);
1072 VK_SHADER_STAGE_COMPUTE_BIT);
1076 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1077 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1084 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1085 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1088 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1089 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1110 int wg_dim =
FFMIN(fv->
s.
props.properties.limits.maxComputeWorkGroupSize[0], 1024);
1113 (uint32_t []) { wg_dim, 1, 1 }, 0);
1116 VK_SHADER_STAGE_COMPUTE_BIT);
1120 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1121 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1128 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1129 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1132 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1133 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1162 (uint32_t []) { wg_x, 1, 1 }, 0);
1165 VK_SHADER_STAGE_COMPUTE_BIT);
1169 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1170 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1173 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1174 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1177 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1178 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1185 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1186 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1189 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1190 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1193 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1194 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1197 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1198 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1202 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1203 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1206 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1207 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1211 4 + fv->
is_rgb + !!
f->remap_mode, 0, 0);
1213 if (
f->remap_mode) {
1251 size_t maxsize, max_heap_size, max_host_size;
1273 if (
f->bits_per_raw_sample > (
f->version > 3 ? 16 : 8)) {
1276 "forcing range coder\n");
1281 if (
f->version < 4 && avctx->
gop_size > 1) {
1292 if (
f->version == 4)
1293 f->micro_version =
f->remap_mode ? 9 : 3;
1298 if (
f->num_h_slices <= 0 &&
f->num_v_slices <= 0) {
1304 f->num_h_slices = 32;
1305 f->num_v_slices = 32;
1307 }
else if (
f->num_h_slices &&
f->num_v_slices <= 0) {
1309 }
else if (
f->num_v_slices &&
f->num_h_slices <= 0) {
1313 f->num_h_slices =
FFMIN(
f->num_h_slices, avctx->
width);
1318 "by the standard is %i\n",
1323 f->max_slice_count =
f->num_h_slices *
f->num_v_slices;
1328 if (
f->version < 4) {
1329 if (((
f->chroma_h_shift > 0) && (avctx->
width % (64 <<
f->chroma_h_shift))) ||
1330 ((
f->chroma_v_shift > 0) && (avctx->
height % (64 <<
f->chroma_v_shift)))) {
1332 "dimensions is only supported in version 4 (-level 4)\n");
1338 if (
f->version < 4) {
1361 for (
int i = 0;
i < fv->
s.
mprops.memoryHeapCount;
i++) {
1362 if (fv->
s.
mprops.memoryHeaps[
i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
1365 if (!(fv->
s.
mprops.memoryHeaps[
i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT))
1366 max_host_size =
FFMAX(max_host_size,
1372 if (maxsize > fv->
s.
props_11.maxMemoryAllocationSize) {
1374 "than maximum device allocation (%zu), clipping\n",
1375 maxsize, fv->
s.
props_11.maxMemoryAllocationSize);
1376 maxsize = fv->
s.
props_11.maxMemoryAllocationSize;
1379 if (max_heap_size < maxsize) {
1381 "using host memory (slower)\n",
1385 max_heap_size = max_host_size - (max_host_size >> 1);
1388 max_heap_size = max_heap_size - (max_heap_size >> 3);
1391 av_log(avctx,
AV_LOG_INFO,
"Async buffers: %zuMiB per context, %zuMiB total, depth: %i\n",
1392 maxsize / (1024*1024),
1426 uint32_t mw = (avctx->
width +
f->num_h_slices - 1) /
f->num_h_slices;
1427 uint32_t
mh = (avctx->
height +
f->num_v_slices - 1) /
f->num_v_slices;
1430 uint32_t pn = mw*
mh;
1458 if (
f->remap_mode) {
1493 &fv->
setup, 0, 0, 0,
1495 256*
sizeof(uint32_t), 512*
sizeof(uint8_t),
1496 VK_FORMAT_UNDEFINED));
1502 256*
sizeof(uint32_t), 512*
sizeof(uint8_t),
1503 VK_FORMAT_UNDEFINED));
1507 256*
sizeof(uint32_t) + 512*
sizeof(uint8_t),
1509 VK_FORMAT_UNDEFINED));
1513 0, 256*
sizeof(uint32_t),
1514 VK_FORMAT_UNDEFINED));
1537 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
1538 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1539 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
1588 #define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x)
1589 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1592 { .i64 = -1 }, -1, 2,
VE },
1594 { .i64 = 0 }, 0, 1,
VE },
1604 { .i64 = -1 }, -1, 2,
VE , .unit =
"qtable"},
1608 { .i64 =
QTABLE_8BIT }, INT_MIN, INT_MAX,
VE, .unit =
"qtable" },
1618 { .i64 = 0 }, 0, 1,
VE },
1620 {
"rct_search",
"Run a search for RCT parameters (level 4 only)",
OFFSET(optimize_rct),
AV_OPT_TYPE_BOOL,
1621 { .i64 = 1 }, 0, 1,
VE },
1624 { .i64 = 1 }, 1, INT_MAX,
VE },
1627 { .i64 = -1 }, -1, 2,
VE, .unit =
"remap_mode" },
1629 { .i64 = -1 }, INT_MIN, INT_MAX,
VE, .unit =
"remap_mode" },
1631 { .i64 = 0 }, INT_MIN, INT_MAX,
VE, .unit =
"remap_mode" },
1633 { .i64 = 1 }, INT_MIN, INT_MAX,
VE, .unit =
"remap_mode" },
1635 { .i64 = 2 }, INT_MIN, INT_MAX,
VE, .unit =
"remap_mode" },
1658 .
p.
name =
"ffv1_vulkan",
1676 .p.wrapper_name =
"vulkan",