30 #include <drm_fourcc.h>
33 #include <va/va_drmcommon.h>
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
65 VkPhysicalDeviceProperties2
props;
66 VkPhysicalDeviceMemoryProperties
mprops;
67 VkPhysicalDeviceExternalMemoryHostPropertiesEXT
hprops;
107 #define GET_QUEUE_COUNT(hwctx, graph, comp, tx) ( \
108 graph ? hwctx->nb_graphics_queues : \
109 comp ? (hwctx->nb_comp_queues ? \
110 hwctx->nb_comp_queues : hwctx->nb_graphics_queues) : \
111 tx ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues : \
112 (hwctx->nb_comp_queues ? \
113 hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \
117 #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
118 vkGetInstanceProcAddr(inst, #name)
120 #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
121 VK_IMAGE_USAGE_STORAGE_BIT | \
122 VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
123 VK_IMAGE_USAGE_TRANSFER_DST_BIT)
125 #define ADD_VAL_TO_LIST(list, count, val) \
127 list = av_realloc_array(list, sizeof(*list), ++count); \
129 err = AVERROR(ENOMEM); \
132 list[count - 1] = av_strdup(val); \
133 if (!list[count - 1]) { \
134 err = AVERROR(ENOMEM); \
139 static const struct {
157 {
AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
162 {
AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
167 {
AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
172 {
AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
173 {
AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
175 {
AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
177 {
AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
178 {
AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
179 {
AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
180 {
AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
182 {
AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
183 {
AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
184 {
AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
185 {
AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
206 {
AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
207 {
AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
208 {
AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
209 {
AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
230 VkFormatFeatureFlags
flags;
231 VkFormatProperties2 prop = {
232 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
234 vkGetPhysicalDeviceFormatProperties2(hwctx->
phys_dev, fmt[
i], &prop);
235 flags =
linear ? prop.formatProperties.linearTilingFeatures :
236 prop.formatProperties.optimalTilingFeatures;
278 #define CASE(VAL) case VAL: return #VAL
284 CASE(VK_EVENT_RESET);
286 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
287 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
288 CASE(VK_ERROR_INITIALIZATION_FAILED);
289 CASE(VK_ERROR_DEVICE_LOST);
290 CASE(VK_ERROR_MEMORY_MAP_FAILED);
291 CASE(VK_ERROR_LAYER_NOT_PRESENT);
292 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
293 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
294 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
295 CASE(VK_ERROR_TOO_MANY_OBJECTS);
296 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
297 CASE(VK_ERROR_FRAGMENTED_POOL);
298 CASE(VK_ERROR_SURFACE_LOST_KHR);
299 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
300 CASE(VK_SUBOPTIMAL_KHR);
301 CASE(VK_ERROR_OUT_OF_DATE_KHR);
302 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
303 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
304 CASE(VK_ERROR_INVALID_SHADER_NV);
305 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
306 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
307 CASE(VK_ERROR_NOT_PERMITTED_EXT);
308 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
309 CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
310 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
311 default:
return "Unknown error";
317 VkDebugUtilsMessageTypeFlagsEXT messageType,
318 const VkDebugUtilsMessengerCallbackDataEXT *
data,
325 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l =
AV_LOG_VERBOSE;
break;
326 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l =
AV_LOG_INFO;
break;
327 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l =
AV_LOG_WARNING;
break;
328 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l =
AV_LOG_ERROR;
break;
333 for (
int i = 0;
i <
data->cmdBufLabelCount;
i++)
340 const char *
const **dst, uint32_t *num,
int debug)
343 const char **extension_names =
NULL;
346 int err = 0, found, extensions_found = 0;
349 int optional_exts_num;
350 uint32_t sup_ext_count;
351 char *user_exts_str =
NULL;
353 VkExtensionProperties *sup_ext;
363 if (!user_exts_str) {
368 vkEnumerateInstanceExtensionProperties(
NULL, &sup_ext_count,
NULL);
369 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
372 vkEnumerateInstanceExtensionProperties(
NULL, &sup_ext_count, sup_ext);
380 if (!user_exts_str) {
385 vkEnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
386 &sup_ext_count,
NULL);
387 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
390 vkEnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
391 &sup_ext_count, sup_ext);
394 for (
int i = 0;
i < optional_exts_num;
i++) {
395 tstr = optional_exts[
i].
name;
397 for (
int j = 0; j < sup_ext_count; j++) {
398 if (!strcmp(tstr, sup_ext[j].extensionName)) {
412 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
414 for (
int j = 0; j < sup_ext_count; j++) {
415 if (!strcmp(tstr, sup_ext[j].extensionName)) {
432 char *save, *token =
av_strtok(user_exts_str,
"+", &save);
435 for (
int j = 0; j < sup_ext_count; j++) {
436 if (!strcmp(token, sup_ext[j].extensionName)) {
452 *dst = extension_names;
453 *num = extensions_found;
461 for (
int i = 0;
i < extensions_found;
i++)
462 av_free((
void *)extension_names[
i]);
477 const int debug_mode = debug_opt && strtol(debug_opt->
value,
NULL, 10);
478 VkApplicationInfo application_info = {
479 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
480 .pEngineName =
"libavutil",
481 .apiVersion = VK_API_VERSION_1_1,
486 VkInstanceCreateInfo inst_props = {
487 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
488 .pApplicationInfo = &application_info,
493 &inst_props.enabledExtensionCount, debug_mode);
498 static const char *layers[] = {
"VK_LAYER_KHRONOS_validation" };
499 inst_props.ppEnabledLayerNames = layers;
504 ret = vkCreateInstance(&inst_props, hwctx->
alloc, &hwctx->
inst);
507 if (
ret != VK_SUCCESS) {
510 for (
int i = 0;
i < inst_props.enabledExtensionCount;
i++)
511 av_free((
void *)inst_props.ppEnabledExtensionNames[
i]);
512 av_free((
void *)inst_props.ppEnabledExtensionNames);
517 VkDebugUtilsMessengerCreateInfoEXT dbg = {
518 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
519 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
520 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
521 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
522 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
523 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
524 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
525 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
531 pfn_vkCreateDebugUtilsMessengerEXT(hwctx->
inst, &dbg,
553 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
return "integrated";
554 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
return "discrete";
555 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
return "virtual";
556 case VK_PHYSICAL_DEVICE_TYPE_CPU:
return "software";
557 default:
return "unknown";
564 int err = 0, choice = -1;
567 VkPhysicalDevice *devices =
NULL;
568 VkPhysicalDeviceIDProperties *idp =
NULL;
569 VkPhysicalDeviceProperties2 *prop =
NULL;
572 ret = vkEnumeratePhysicalDevices(hwctx->
inst, &num,
NULL);
573 if (
ret != VK_SUCCESS || !num) {
582 ret = vkEnumeratePhysicalDevices(hwctx->
inst, &num, devices);
583 if (
ret != VK_SUCCESS) {
603 for (
int i = 0;
i < num;
i++) {
604 idp[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
605 prop[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
606 prop[
i].pNext = &idp[
i];
608 vkGetPhysicalDeviceProperties2(devices[
i], &prop[
i]);
610 prop[
i].properties.deviceName,
612 prop[
i].properties.deviceID);
616 for (
int i = 0;
i < num;
i++) {
617 if (!strncmp(idp[
i].deviceUUID, select->
uuid, VK_UUID_SIZE)) {
625 }
else if (select->
name) {
627 for (
int i = 0;
i < num;
i++) {
628 if (strstr(prop[
i].properties.deviceName, select->
name)) {
639 for (
int i = 0;
i < num;
i++) {
640 if (select->
pci_device == prop[
i].properties.deviceID) {
651 for (
int i = 0;
i < num;
i++) {
652 if (select->
vendor_id == prop[
i].properties.vendorID) {
662 if (select->
index < num) {
663 choice = select->
index;
687 VkQueueFamilyProperties *qs =
NULL;
689 int graph_index = -1, comp_index = -1, tx_index = -1;
690 VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
693 vkGetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num,
NULL);
705 vkGetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num, qs);
707 #define SEARCH_FLAGS(expr, out) \
708 for (int i = 0; i < num; i++) { \
709 const VkQueueFlagBits flags = qs[i].queueFlags; \
722 (
i != comp_index), tx_index)
725 #define ADD_QUEUE(fidx, graph, comp, tx) \
726 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
727 fidx, qs[fidx].queueCount, graph ? "graphics " : "", \
728 comp ? "compute " : "", tx ? "transfers " : ""); \
729 av_log(ctx, AV_LOG_VERBOSE, " QF %i flags: %s%s%s%s\n", fidx, \
730 ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "", \
731 ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "", \
732 ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "", \
733 ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""); \
734 pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx; \
735 pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount; \
736 weights = av_malloc(qs[fidx].queueCount * sizeof(float)); \
737 pc[cd->queueCreateInfoCount].pQueuePriorities = weights; \
740 for (int i = 0; i < qs[fidx].queueCount; i++) \
742 cd->queueCreateInfoCount++;
744 ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
750 if (comp_index != -1) {
751 ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
757 if (tx_index != -1) {
778 int queue_family_index,
int num_queues)
784 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
785 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
786 .queueFamilyIndex = queue_family_index,
788 VkCommandBufferAllocateInfo cbuf_create = {
789 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
790 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
791 .commandBufferCount = num_queues,
799 if (
ret != VK_SUCCESS) {
809 cbuf_create.commandPool = cmd->
pool;
812 ret = vkAllocateCommandBuffers(hwctx->
act_dev, &cbuf_create, cmd->
bufs);
813 if (
ret != VK_SUCCESS) {
824 for (
int i = 0;
i < num_queues;
i++) {
826 vkGetDeviceQueue(hwctx->
act_dev, queue_family_index,
i, &q->
queue);
843 vkWaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
888 VkCommandBufferBeginInfo cmd_start = {
889 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
890 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
895 VkFenceCreateInfo fence_spawn = {
896 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
900 if (
ret != VK_SUCCESS) {
906 vkWaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
914 if (
ret != VK_SUCCESS) {
929 if (!deps || !nb_deps)
939 for (
int i = 0;
i < nb_deps;
i++) {
954 VkSubmitInfo *s_info,
int synchronous)
960 if (
ret != VK_SUCCESS) {
968 s_info->commandBufferCount = 1;
971 if (
ret != VK_SUCCESS) {
980 vkWaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
999 pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->
inst, p->
debug_ctx,
1003 vkDestroyInstance(hwctx->
inst, hwctx->
alloc);
1023 VkPhysicalDeviceFeatures dev_features = { 0 };
1024 VkDeviceQueueCreateInfo queue_create_info[3] = {
1025 { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1026 { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1027 { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1030 VkDeviceCreateInfo dev_info = {
1031 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1033 .pQueueCreateInfos = queue_create_info,
1034 .queueCreateInfoCount = 0,
1037 hwctx->
device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1048 vkGetPhysicalDeviceFeatures(hwctx->
phys_dev, &dev_features);
1049 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME;
1063 &dev_info.enabledExtensionCount, 0))) {
1064 av_free((
void *)queue_create_info[0].pQueuePriorities);
1065 av_free((
void *)queue_create_info[1].pQueuePriorities);
1066 av_free((
void *)queue_create_info[2].pQueuePriorities);
1073 av_free((
void *)queue_create_info[0].pQueuePriorities);
1074 av_free((
void *)queue_create_info[1].pQueuePriorities);
1075 av_free((
void *)queue_create_info[2].pQueuePriorities);
1077 if (
ret != VK_SUCCESS) {
1080 for (
int i = 0;
i < dev_info.enabledExtensionCount;
i++)
1081 av_free((
void *)dev_info.ppEnabledExtensionNames[
i]);
1082 av_free((
void *)dev_info.ppEnabledExtensionNames);
1118 p->
props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1120 p->
hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1124 p->
props.properties.deviceName);
1127 p->
props.properties.limits.optimalBufferCopyRowPitchAlignment);
1129 p->
props.properties.limits.minMemoryMapAlignment);
1132 p->
hprops.minImportedHostPointerAlignment);
1136 vkGetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &queue_num,
NULL);
1142 #define CHECK_QUEUE(type, n) \
1143 if (n >= queue_num) { \
1144 av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
1145 type, n, queue_num); \
1146 return AVERROR(EINVAL); \
1164 vkGetPhysicalDeviceMemoryProperties(hwctx->
phys_dev, &p->
mprops);
1173 if (device && device[0]) {
1175 dev_select.
index = strtol(device, &end, 10);
1176 if (end == device) {
1177 dev_select.
index = 0;
1178 dev_select.
name = device;
1194 switch(src_ctx->
type) {
1200 const char *vendor = vaQueryVendorString(src_hwctx->
display);
1206 if (strstr(vendor,
"Intel"))
1207 dev_select.vendor_id = 0x8086;
1208 if (strstr(vendor,
"AMD"))
1209 dev_select.vendor_id = 0x1002;
1217 drmDevice *drm_dev_info;
1218 int err = drmGetDevice(src_hwctx->
fd, &drm_dev_info);
1224 if (drm_dev_info->bustype == DRM_BUS_PCI)
1225 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
1227 drmFreeDevice(&drm_dev_info);
1237 CudaFunctions *cu = cu_internal->
cuda_dl;
1239 int ret =
CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1246 dev_select.has_uuid = 1;
1257 const void *hwconfig,
1290 constraints->
max_width = p->
props.properties.limits.maxImageDimension2D;
1291 constraints->
max_height = p->
props.properties.limits.maxImageDimension2D;
1304 VkMemoryPropertyFlagBits req_flags,
const void *alloc_extension,
1305 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1311 VkMemoryAllocateInfo alloc_info = {
1312 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1313 .pNext = alloc_extension,
1314 .allocationSize = req->size,
1319 for (
int i = 0;
i < p->
mprops.memoryTypeCount;
i++) {
1320 const VkMemoryType *
type = &p->
mprops.memoryTypes[
i];
1323 if (!(req->memoryTypeBits & (1 <<
i)))
1327 if ((
type->propertyFlags & req_flags) != req_flags)
1331 if (req->size > p->
mprops.memoryHeaps[
type->heapIndex].size)
1345 alloc_info.memoryTypeIndex =
index;
1347 ret = vkAllocateMemory(dev_hwctx->
act_dev, &alloc_info,
1348 dev_hwctx->
alloc, mem);
1349 if (
ret != VK_SUCCESS) {
1355 *mem_flags |= p->
mprops.memoryTypes[
index].propertyFlags;
1366 if (internal->cuda_fc_ref) {
1372 CudaFunctions *cu = cu_internal->
cuda_dl;
1375 if (internal->cu_sem[
i])
1376 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[
i]));
1377 if (internal->cu_mma[
i])
1378 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[
i]));
1379 if (internal->ext_mem[
i])
1380 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[
i]));
1409 void *alloc_pnext,
size_t alloc_pnext_stride)
1422 VkImageMemoryRequirementsInfo2 req_desc = {
1423 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1426 VkMemoryDedicatedAllocateInfo ded_alloc = {
1427 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1428 .pNext = (
void *)(((
uint8_t *)alloc_pnext) +
i*alloc_pnext_stride),
1430 VkMemoryDedicatedRequirements ded_req = {
1431 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1433 VkMemoryRequirements2 req = {
1434 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1438 vkGetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
1440 if (
f->tiling == VK_IMAGE_TILING_LINEAR)
1441 req.memoryRequirements.size =
FFALIGN(req.memoryRequirements.size,
1442 p->
props.properties.limits.minMemoryMapAlignment);
1445 use_ded_mem = ded_req.prefersDedicatedAllocation |
1446 ded_req.requiresDedicatedAllocation;
1448 ded_alloc.image =
f->img[
i];
1452 f->tiling == VK_IMAGE_TILING_LINEAR ?
1453 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1454 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1455 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
1456 &
f->flags, &
f->mem[
i])))
1459 f->size[
i] = req.memoryRequirements.size;
1460 bind_info[
i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1461 bind_info[
i].image =
f->img[
i];
1462 bind_info[
i].memory =
f->mem[
i];
1467 if (
ret != VK_SUCCESS) {
1487 VkImageLayout new_layout;
1488 VkAccessFlags new_access;
1493 VkSubmitInfo s_info = {
1494 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1495 .pSignalSemaphores =
frame->sem,
1496 .signalSemaphoreCount =
planes,
1501 wait_st[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1505 new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1506 new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1507 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1510 new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1511 new_access = VK_ACCESS_TRANSFER_READ_BIT;
1512 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1515 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1516 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1517 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1518 s_info.pWaitSemaphores =
frame->sem;
1519 s_info.pWaitDstStageMask = wait_st;
1520 s_info.waitSemaphoreCount =
planes;
1531 img_bar[
i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1532 img_bar[
i].srcAccessMask = 0x0;
1533 img_bar[
i].dstAccessMask = new_access;
1534 img_bar[
i].oldLayout =
frame->layout[
i];
1535 img_bar[
i].newLayout = new_layout;
1536 img_bar[
i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1537 img_bar[
i].dstQueueFamilyIndex = dst_qf;
1538 img_bar[
i].image =
frame->img[
i];
1539 img_bar[
i].subresourceRange.levelCount = 1;
1540 img_bar[
i].subresourceRange.layerCount = 1;
1541 img_bar[
i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1543 frame->layout[
i] = img_bar[
i].newLayout;
1544 frame->access[
i] = img_bar[
i].dstAccessMask;
1548 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1549 VK_PIPELINE_STAGE_TRANSFER_BIT,
1556 int frame_w,
int frame_h,
int plane)
1573 VkImageTiling tiling, VkImageUsageFlagBits
usage,
1585 VkExportSemaphoreCreateInfo ext_sem_info = {
1586 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
1587 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
1590 VkSemaphoreCreateInfo sem_spawn = {
1591 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1603 VkImageCreateInfo create_info = {
1604 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1605 .pNext = create_pnext,
1606 .imageType = VK_IMAGE_TYPE_2D,
1607 .format = img_fmts[
i],
1611 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
1613 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
1615 .samples = VK_SAMPLE_COUNT_1_BIT,
1616 .pQueueFamilyIndices = p->
qfs,
1617 .queueFamilyIndexCount = p->
num_qfs,
1618 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
1619 VK_SHARING_MODE_EXCLUSIVE,
1622 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
1625 ret = vkCreateImage(hwctx->
act_dev, &create_info,
1627 if (
ret != VK_SUCCESS) {
1635 ret = vkCreateSemaphore(hwctx->
act_dev, &sem_spawn,
1637 if (
ret != VK_SUCCESS) {
1643 f->layout[
i] = create_info.initialLayout;
1660 VkExternalMemoryHandleTypeFlags *comp_handle_types,
1661 VkExternalMemoryHandleTypeFlagBits *iexp,
1662 VkExternalMemoryHandleTypeFlagBits
exp)
1667 VkExternalImageFormatProperties eprops = {
1668 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
1670 VkImageFormatProperties2 props = {
1671 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
1674 VkPhysicalDeviceExternalImageFormatInfo enext = {
1675 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
1678 VkPhysicalDeviceImageFormatInfo2 pinfo = {
1679 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1680 .pNext = !
exp ?
NULL : &enext,
1682 .type = VK_IMAGE_TYPE_2D,
1684 .usage = hwctx->
usage,
1685 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
1688 ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->
phys_dev,
1690 if (
ret == VK_SUCCESS) {
1692 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
1706 VkExternalMemoryHandleTypeFlags e = 0x0;
1708 VkExternalMemoryImageCreateInfo eiinfo = {
1709 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1715 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
1719 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1722 eminfo[
i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
1724 eminfo[
i].handleTypes = e;
1728 eiinfo.handleTypes ? &eiinfo :
NULL);
1772 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1857 !(
map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1862 flush_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1863 flush_ranges[
i].memory =
map->frame->mem[
i];
1864 flush_ranges[
i].size = VK_WHOLE_SIZE;
1869 if (
ret != VK_SUCCESS) {
1876 vkUnmapMemory(hwctx->
act_dev,
map->frame->mem[
i]);
1885 int err, mapped_mem_count = 0;
1901 if (!(
f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
1902 !(
f->tiling == VK_IMAGE_TILING_LINEAR)) {
1913 ret = vkMapMemory(hwctx->act_dev,
f->mem[
i], 0,
1914 VK_WHOLE_SIZE, 0, (
void **)&dst->
data[
i]);
1915 if (
ret != VK_SUCCESS) {
1926 !(
f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1929 map_mem_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1930 map_mem_ranges[
i].size = VK_WHOLE_SIZE;
1931 map_mem_ranges[
i].memory =
f->mem[
i];
1934 ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev,
planes,
1936 if (
ret != VK_SUCCESS) {
1945 VkImageSubresource
sub = {
1946 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1948 VkSubresourceLayout
layout;
1949 vkGetImageSubresourceLayout(hwctx->act_dev,
f->img[
i], &
sub, &
layout);
1964 for (
int i = 0;
i < mapped_mem_count;
i++)
1965 vkUnmapMemory(hwctx->act_dev,
f->mem[
i]);
1987 static const struct {
1988 uint32_t drm_fourcc;
1990 } vulkan_drm_format_map[] = {
1991 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
1992 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
1993 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
1994 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
1995 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
1996 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
1997 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1998 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1999 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2000 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2003 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
2006 if (vulkan_drm_format_map[
i].drm_fourcc == drm_fourcc)
2007 return vulkan_drm_format_map[
i].vk_format;
2008 return VK_FORMAT_UNDEFINED;
2017 int bind_counts = 0;
2028 VkExternalMemoryHandleTypeFlagBits
htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
2032 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2033 if (drm_to_vulkan_fmt(
desc->layers[
i].format) == VK_FORMAT_UNDEFINED) {
2035 desc->layers[
i].format);
2046 f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
2047 desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
2048 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
2050 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2052 VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
2053 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
2054 .drmFormatModifier =
desc->objects[0].format_modifier,
2055 .drmFormatModifierPlaneCount =
planes,
2056 .pPlaneLayouts = (
const VkSubresourceLayout *)&plane_data,
2059 VkExternalMemoryImageCreateInfo einfo = {
2060 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2061 .pNext = has_modifiers ? &drm_info :
NULL,
2062 .handleTypes =
htype,
2065 VkSemaphoreCreateInfo sem_spawn = {
2066 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2069 VkImageCreateInfo create_info = {
2070 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2072 .imageType = VK_IMAGE_TYPE_2D,
2073 .format = drm_to_vulkan_fmt(
desc->layers[
i].format),
2077 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2078 .tiling =
f->tiling,
2079 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2080 .usage = frames_hwctx->
usage,
2081 .samples = VK_SAMPLE_COUNT_1_BIT,
2082 .pQueueFamilyIndices = p->
qfs,
2083 .queueFamilyIndexCount = p->
num_qfs,
2084 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2085 VK_SHARING_MODE_EXCLUSIVE,
2088 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2091 for (
int j = 0; j <
planes; j++) {
2092 plane_data[j].offset =
desc->layers[
i].planes[j].offset;
2093 plane_data[j].rowPitch =
desc->layers[
i].planes[j].pitch;
2094 plane_data[j].size = 0;
2095 plane_data[j].arrayPitch = 0;
2096 plane_data[j].depthPitch = 0;
2100 ret = vkCreateImage(hwctx->
act_dev, &create_info,
2102 if (
ret != VK_SUCCESS) {
2109 ret = vkCreateSemaphore(hwctx->
act_dev, &sem_spawn,
2111 if (
ret != VK_SUCCESS) {
2122 f->layout[
i] = create_info.initialLayout;
2126 for (
int i = 0;
i <
desc->nb_objects;
i++) {
2127 int use_ded_mem = 0;
2128 VkMemoryFdPropertiesKHR fdmp = {
2129 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
2131 VkMemoryRequirements req = {
2132 .size =
desc->objects[
i].size,
2134 VkImportMemoryFdInfoKHR idesc = {
2135 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
2136 .handleType =
htype,
2137 .fd = dup(
desc->objects[
i].fd),
2139 VkMemoryDedicatedAllocateInfo ded_alloc = {
2140 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2146 if (
ret != VK_SUCCESS) {
2154 req.memoryTypeBits = fdmp.memoryTypeBits;
2159 if (
desc->nb_layers ==
desc->nb_objects) {
2160 VkImageMemoryRequirementsInfo2 req_desc = {
2161 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2164 VkMemoryDedicatedRequirements ded_req = {
2165 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2167 VkMemoryRequirements2 req2 = {
2168 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2172 vkGetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req2);
2174 use_ded_mem = ded_req.prefersDedicatedAllocation |
2175 ded_req.requiresDedicatedAllocation;
2177 ded_alloc.image =
f->img[
i];
2180 err =
alloc_mem(
ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2181 use_ded_mem ? &ded_alloc : ded_alloc.pNext,
2182 &
f->flags, &
f->mem[
i]);
2188 f->size[
i] =
desc->objects[
i].size;
2191 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2193 const int signal_p = has_modifiers && (
planes > 1);
2194 for (
int j = 0; j <
planes; j++) {
2195 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2196 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
2197 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
2199 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
2200 plane_info[bind_counts].planeAspect = aspect;
2202 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2203 bind_info[bind_counts].pNext = signal_p ? &
plane_info[bind_counts] :
NULL;
2204 bind_info[bind_counts].image =
f->img[
i];
2205 bind_info[bind_counts].memory =
f->mem[
desc->layers[
i].planes[j].object_index];
2206 bind_info[bind_counts].memoryOffset =
desc->layers[
i].planes[j].offset;
2212 ret = vkBindImageMemory2(hwctx->
act_dev, bind_counts, bind_info);
2213 if (
ret != VK_SUCCESS) {
2231 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2235 for (
int i = 0;
i <
desc->nb_objects;
i++)
2250 if ((err = vulkan_map_from_drm_frame_desc(hwfc, &
f,
src)))
2266 &vulkan_unmap_from,
map);
2289 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)
src->data[3];
2295 vaSyncSurface(vaapi_ctx->display, surface_id);
2303 err = vulkan_map_from_drm(dst_fc, dst,
tmp,
flags);
2336 CudaFunctions *cu = cu_internal->
cuda_dl;
2337 CUarray_format cufmt =
desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2338 CU_AD_FORMAT_UNSIGNED_INT8;
2343 if (!dst_int || !dst_int->cuda_fc_ref) {
2353 if (!dst_int->cuda_fc_ref) {
2359 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
2364 .NumChannels = 1 + ((
planes == 2) &&
i),
2369 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
2370 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
2371 .size = dst_f->
size[
i],
2373 VkMemoryGetFdInfoKHR export_info = {
2374 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2375 .memory = dst_f->
mem[
i],
2376 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
2378 VkSemaphoreGetFdInfoKHR sem_export = {
2379 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
2380 .semaphore = dst_f->
sem[
i],
2381 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2383 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
2384 .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
2390 tex_desc.arrayDesc.Width = p_w;
2391 tex_desc.arrayDesc.Height = p_h;
2393 ret = pfn_vkGetMemoryFdKHR(hwctx->
act_dev, &export_info,
2394 &ext_desc.handle.fd);
2395 if (
ret != VK_SUCCESS) {
2401 ret =
CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[
i], &ext_desc));
2407 ret =
CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[
i],
2408 dst_int->ext_mem[
i],
2415 ret =
CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[
i],
2416 dst_int->cu_mma[
i], 0));
2422 ret = pfn_vkGetSemaphoreFdKHR(hwctx->
act_dev, &sem_export,
2423 &ext_sem_desc.handle.fd);
2424 if (
ret != VK_SUCCESS) {
2431 ret =
CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[
i],
2461 CudaFunctions *cu = cu_internal->
cuda_dl;
2465 ret =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2471 ret = vulkan_export_to_cuda(hwfc,
src->hw_frames_ctx, dst);
2479 ret =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
2480 planes, cuda_dev->stream));
2487 CUDA_MEMCPY2D cpy = {
2488 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
2489 .srcDevice = (CUdeviceptr)
src->data[
i],
2490 .srcPitch =
src->linesize[
i],
2493 .dstMemoryType = CU_MEMORYTYPE_ARRAY,
2494 .dstArray = dst_int->cu_array[
i],
2500 cpy.WidthInBytes = p_w *
desc->comp[
i].step;
2503 ret =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2510 ret =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
2511 planes, cuda_dev->stream));
2537 switch (
src->format) {
2542 return vulkan_map_from_vaapi(hwfc, dst,
src,
flags);
2546 return vulkan_map_from_drm(hwfc, dst,
src,
flags);
2554 typedef struct VulkanDRMMapping {
2569 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
2572 if (vulkan_drm_format_map[
i].vk_format == vkfmt)
2573 return vulkan_drm_format_map[
i].drm_fourcc;
2574 return DRM_FORMAT_INVALID;
2588 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
2589 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
2606 ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->
act_dev,
f->img[0],
2608 if (
ret != VK_SUCCESS) {
2615 for (
int i = 0; (
i <
planes) && (
f->mem[
i]);
i++) {
2616 VkMemoryGetFdInfoKHR export_info = {
2617 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2618 .memory =
f->mem[
i],
2619 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2622 ret = pfn_vkGetMemoryFdKHR(hwctx->
act_dev, &export_info,
2624 if (
ret != VK_SUCCESS) {
2637 VkSubresourceLayout
layout;
2638 VkImageSubresource
sub = {
2640 VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2641 VK_IMAGE_ASPECT_COLOR_BIT,
2645 drm_desc->
layers[
i].
format = vulkan_fmt_to_drm(plane_vkfmt);
2656 if (
f->tiling == VK_IMAGE_TILING_OPTIMAL)
2714 return vulkan_map_to_drm(hwfc, dst,
src,
flags);
2718 return vulkan_map_to_vaapi(hwfc, dst,
src,
flags);
2757 VkBufferUsageFlags
usage, VkMemoryPropertyFlagBits
flags,
2758 size_t size, uint32_t req_memory_bits,
int host_mapped,
2759 void *create_pnext,
void *alloc_pnext)
2766 VkBufferCreateInfo buf_spawn = {
2767 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2768 .pNext = create_pnext,
2771 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2774 VkBufferMemoryRequirementsInfo2 req_desc = {
2775 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
2777 VkMemoryDedicatedAllocateInfo ded_alloc = {
2778 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2779 .pNext = alloc_pnext,
2781 VkMemoryDedicatedRequirements ded_req = {
2782 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2784 VkMemoryRequirements2 req = {
2785 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2796 if (
ret != VK_SUCCESS) {
2803 req_desc.buffer = vkbuf->
buf;
2805 vkGetBufferMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
2808 use_ded_mem = ded_req.prefersDedicatedAllocation |
2809 ded_req.requiresDedicatedAllocation;
2811 ded_alloc.buffer = vkbuf->
buf;
2814 if (req_memory_bits)
2815 req.memoryRequirements.memoryTypeBits &= req_memory_bits;
2818 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
2824 if (
ret != VK_SUCCESS) {
2846 int nb_buffers,
int invalidate)
2851 int invalidate_count = 0;
2853 for (
int i = 0;
i < nb_buffers;
i++) {
2859 VK_WHOLE_SIZE, 0, (
void **)&mem[
i]);
2860 if (
ret != VK_SUCCESS) {
2870 for (
int i = 0;
i < nb_buffers;
i++) {
2872 const VkMappedMemoryRange ival_buf = {
2873 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2874 .memory = vkbuf->
mem,
2875 .size = VK_WHOLE_SIZE,
2884 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2887 invalidate_ctx[invalidate_count++] = ival_buf;
2890 if (invalidate_count) {
2891 ret = vkInvalidateMappedMemoryRanges(hwctx->
act_dev, invalidate_count,
2893 if (
ret != VK_SUCCESS)
2902 int nb_buffers,
int flush)
2908 int flush_count = 0;
2911 for (
int i = 0;
i < nb_buffers;
i++) {
2913 const VkMappedMemoryRange flush_buf = {
2914 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2915 .memory = vkbuf->
mem,
2916 .size = VK_WHOLE_SIZE,
2919 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2922 flush_ctx[flush_count++] = flush_buf;
2927 ret = vkFlushMappedMemoryRanges(hwctx->
act_dev, flush_count, flush_ctx);
2928 if (
ret != VK_SUCCESS) {
2935 for (
int i = 0;
i < nb_buffers;
i++) {
2948 const int *buf_stride,
int w,
2965 VkSubmitInfo s_info = {
2966 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
2967 .pSignalSemaphores =
frame->sem,
2968 .pWaitSemaphores =
frame->sem,
2969 .pWaitDstStageMask = sem_wait_dst,
2970 .signalSemaphoreCount =
planes,
2971 .waitSemaphoreCount =
planes,
2979 VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
2980 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
2981 VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
2982 VK_ACCESS_TRANSFER_WRITE_BIT;
2984 sem_wait_dst[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
2987 if ((
frame->layout[
i] == new_layout) && (
frame->access[
i] & new_access))
2990 img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
2991 img_bar[bar_num].srcAccessMask = 0x0;
2992 img_bar[bar_num].dstAccessMask = new_access;
2993 img_bar[bar_num].oldLayout =
frame->layout[
i];
2994 img_bar[bar_num].newLayout = new_layout;
2995 img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2996 img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2997 img_bar[bar_num].image =
frame->img[
i];
2998 img_bar[bar_num].subresourceRange.levelCount = 1;
2999 img_bar[bar_num].subresourceRange.layerCount = 1;
3000 img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3002 frame->layout[
i] = img_bar[bar_num].newLayout;
3003 frame->access[
i] = img_bar[bar_num].dstAccessMask;
3009 vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3010 VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
3011 0,
NULL, 0,
NULL, bar_num, img_bar);
3016 VkBufferImageCopy buf_reg = {
3017 .bufferOffset = buf_offsets[
i],
3018 .bufferRowLength = buf_stride[
i] /
desc->comp[
i].step,
3019 .imageSubresource.layerCount = 1,
3020 .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
3021 .imageOffset = { 0, 0, 0, },
3027 buf_reg.bufferImageHeight = p_h;
3028 buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
3031 vkCmdCopyImageToBuffer(cmd_buf,
frame->img[
i],
frame->layout[
i],
3032 vkbuf->
buf, 1, &buf_reg);
3034 vkCmdCopyBufferToImage(cmd_buf, vkbuf->
buf,
frame->img[
i],
3035 frame->layout[
i], 1, &buf_reg);
3087 if (swf->width > hwfc->
width || swf->height > hwfc->
height)
3091 if (
f->tiling == VK_IMAGE_TILING_LINEAR &&
3092 f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3096 map->format = swf->format;
3111 VkExternalMemoryBufferCreateInfo create_desc = {
3112 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
3113 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3116 VkImportMemoryHostPointerInfoEXT import_desc = {
3117 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3118 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3121 VkMemoryHostPointerPropertiesEXT p_props = {
3122 .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
3125 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3130 if (map_host && swf->linesize[
i] > 0) {
3132 offs = (uintptr_t)swf->data[
i] % p->
hprops.minImportedHostPointerAlignment;
3133 import_desc.pHostPointer = swf->data[
i] - offs;
3138 p->
hprops.minImportedHostPointerAlignment);
3140 ret = pfn_vkGetMemoryHostPointerPropertiesEXT(hwctx->
act_dev,
3141 import_desc.handleType,
3142 import_desc.pHostPointer,
3145 if (
ret == VK_SUCCESS) {
3147 buf_offsets[
i] = offs;
3151 if (!host_mapped[
i])
3155 from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
3156 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
3157 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
3158 req_size, p_props.memoryTypeBits, host_mapped[
i],
3159 host_mapped[
i] ? &create_desc :
NULL,
3160 host_mapped[
i] ? &import_desc :
NULL);
3174 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3177 (
const uint8_t *)swf->data[
i], swf->linesize[
i],
3188 swf->width, swf->height, swf->format,
from);
3199 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3223 switch (
src->format) {
3228 return vulkan_transfer_data_from_cuda(hwfc, dst,
src);
3231 if (
src->hw_frames_ctx)
3254 CudaFunctions *cu = cu_internal->
cuda_dl;
3258 ret =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3272 ret =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
3273 planes, cuda_dev->stream));
3280 CUDA_MEMCPY2D cpy = {
3281 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
3282 .dstDevice = (CUdeviceptr)dst->
data[
i],
3286 .srcMemoryType = CU_MEMORYTYPE_ARRAY,
3287 .srcArray = dst_int->cu_array[
i],
3293 cpy.WidthInBytes =
w *
desc->comp[
i].step;
3296 ret =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3303 ret =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
3304 planes, cuda_dev->stream));
3335 return vulkan_transfer_data_to_cuda(hwfc, dst,
src);