482 lines
20 KiB
C
482 lines
20 KiB
C
![]() |
// Tencent is pleased to support the open source community by making ncnn available.
|
||
|
//
|
||
|
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||
|
//
|
||
|
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||
|
// in compliance with the License. You may obtain a copy of the License at
|
||
|
//
|
||
|
// https://opensource.org/licenses/BSD-3-Clause
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software distributed
|
||
|
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||
|
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||
|
// specific language governing permissions and limitations under the License.
|
||
|
|
||
|
#ifndef NCNN_GPU_H
|
||
|
#define NCNN_GPU_H
|
||
|
|
||
|
#include "platform.h"
|
||
|
|
||
|
#if NCNN_VULKAN
|
||
|
|
||
|
#include "mat.h"
|
||
|
|
||
|
namespace ncnn {
|
||
|
|
||
|
// instance
|
||
|
|
||
|
// Create VkInstance and initialize some objects that need to be calculated by GPU
|
||
|
// Creates a VkInstance object, Checks the extended attributes supported by the Vulkan instance concerned,
|
||
|
// Initializes, and creates Vulkan validation layers (if ENABLE_VALIDATION_LAYER is enabled),
|
||
|
// Iterates over all supported physical devices, etc.
|
||
|
NCNN_EXPORT int create_gpu_instance(const char* driver_path = 0);
|
||
|
|
||
|
// Get global VkInstance variable
|
||
|
// Must be called after create_gpu_instance() and before destroy_gpu_instance()
|
||
|
NCNN_EXPORT VkInstance get_gpu_instance();
|
||
|
|
||
|
// Destroy VkInstance object and free the memory of the associated object
|
||
|
// Usually called in the destructor of the main program exit
|
||
|
// The function will internally ensure that all vulkan devices are idle before proceeding with destruction.
|
||
|
NCNN_EXPORT void destroy_gpu_instance();
|
||
|
|
||
|
// vulkan core
|
||
|
extern PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers;
|
||
|
extern PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets;
|
||
|
extern PFN_vkAllocateMemory vkAllocateMemory;
|
||
|
extern PFN_vkBeginCommandBuffer vkBeginCommandBuffer;
|
||
|
extern PFN_vkBindBufferMemory vkBindBufferMemory;
|
||
|
extern PFN_vkBindImageMemory vkBindImageMemory;
|
||
|
extern PFN_vkCmdBeginQuery vkCmdBeginQuery;
|
||
|
extern PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets;
|
||
|
extern PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer;
|
||
|
extern PFN_vkCmdBindPipeline vkCmdBindPipeline;
|
||
|
extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer;
|
||
|
extern PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage;
|
||
|
extern PFN_vkCmdCopyImage vkCmdCopyImage;
|
||
|
extern PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer;
|
||
|
extern PFN_vkCmdCopyQueryPoolResults vkCmdCopyQueryPoolResults;
|
||
|
extern PFN_vkCmdDispatch vkCmdDispatch;
|
||
|
extern PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect;
|
||
|
extern PFN_vkCmdEndQuery vkCmdEndQuery;
|
||
|
extern PFN_vkCmdExecuteCommands vkCmdExecuteCommands;
|
||
|
extern PFN_vkCmdFillBuffer vkCmdFillBuffer;
|
||
|
extern PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
|
||
|
extern PFN_vkCmdPushConstants vkCmdPushConstants;
|
||
|
extern PFN_vkCmdResetQueryPool vkCmdResetQueryPool;
|
||
|
extern PFN_vkCmdResolveImage vkCmdResolveImage;
|
||
|
extern PFN_vkCmdUpdateBuffer vkCmdUpdateBuffer;
|
||
|
extern PFN_vkCmdWriteTimestamp vkCmdWriteTimestamp;
|
||
|
extern PFN_vkCreateBuffer vkCreateBuffer;
|
||
|
extern PFN_vkCreateBufferView vkCreateBufferView;
|
||
|
extern PFN_vkCreateCommandPool vkCreateCommandPool;
|
||
|
extern PFN_vkCreateComputePipelines vkCreateComputePipelines;
|
||
|
extern PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
|
||
|
extern PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
|
||
|
extern PFN_vkCreateDevice vkCreateDevice;
|
||
|
extern PFN_vkCreateFence vkCreateFence;
|
||
|
extern PFN_vkCreateImage vkCreateImage;
|
||
|
extern PFN_vkCreateImageView vkCreateImageView;
|
||
|
extern PFN_vkCreatePipelineCache vkCreatePipelineCache;
|
||
|
extern PFN_vkCreatePipelineLayout vkCreatePipelineLayout;
|
||
|
extern PFN_vkCreateQueryPool vkCreateQueryPool;
|
||
|
extern PFN_vkCreateSampler vkCreateSampler;
|
||
|
extern PFN_vkCreateSemaphore vkCreateSemaphore;
|
||
|
extern PFN_vkCreateShaderModule vkCreateShaderModule;
|
||
|
extern PFN_vkDestroyBuffer vkDestroyBuffer;
|
||
|
extern PFN_vkDestroyBufferView vkDestroyBufferView;
|
||
|
extern PFN_vkDestroyCommandPool vkDestroyCommandPool;
|
||
|
extern PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
|
||
|
extern PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
|
||
|
extern PFN_vkDestroyDevice vkDestroyDevice;
|
||
|
extern PFN_vkDestroyFence vkDestroyFence;
|
||
|
extern PFN_vkDestroyImage vkDestroyImage;
|
||
|
extern PFN_vkDestroyImageView vkDestroyImageView;
|
||
|
extern PFN_vkDestroyInstance vkDestroyInstance;
|
||
|
extern PFN_vkDestroyPipeline vkDestroyPipeline;
|
||
|
extern PFN_vkDestroyPipelineCache vkDestroyPipelineCache;
|
||
|
extern PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout;
|
||
|
extern PFN_vkDestroyQueryPool vkDestroyQueryPool;
|
||
|
extern PFN_vkDestroySampler vkDestroySampler;
|
||
|
extern PFN_vkDestroySemaphore vkDestroySemaphore;
|
||
|
extern PFN_vkDestroyShaderModule vkDestroyShaderModule;
|
||
|
extern PFN_vkDeviceWaitIdle vkDeviceWaitIdle;
|
||
|
extern PFN_vkEndCommandBuffer vkEndCommandBuffer;
|
||
|
extern PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties;
|
||
|
extern PFN_vkEnumerateDeviceLayerProperties vkEnumerateDeviceLayerProperties;
|
||
|
extern PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices;
|
||
|
extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges;
|
||
|
extern PFN_vkFreeCommandBuffers vkFreeCommandBuffers;
|
||
|
extern PFN_vkFreeDescriptorSets vkFreeDescriptorSets;
|
||
|
extern PFN_vkFreeMemory vkFreeMemory;
|
||
|
extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
|
||
|
extern PFN_vkGetDeviceMemoryCommitment vkGetDeviceMemoryCommitment;
|
||
|
extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr;
|
||
|
extern PFN_vkGetDeviceQueue vkGetDeviceQueue;
|
||
|
extern PFN_vkGetFenceStatus vkGetFenceStatus;
|
||
|
extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
|
||
|
extern PFN_vkGetImageSubresourceLayout vkGetImageSubresourceLayout;
|
||
|
extern PFN_vkGetPhysicalDeviceFeatures vkGetPhysicalDeviceFeatures;
|
||
|
extern PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties;
|
||
|
extern PFN_vkGetPhysicalDeviceImageFormatProperties vkGetPhysicalDeviceImageFormatProperties;
|
||
|
extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties;
|
||
|
extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties;
|
||
|
extern PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties;
|
||
|
extern PFN_vkGetPipelineCacheData vkGetPipelineCacheData;
|
||
|
extern PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
|
||
|
extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges;
|
||
|
extern PFN_vkMapMemory vkMapMemory;
|
||
|
extern PFN_vkMergePipelineCaches vkMergePipelineCaches;
|
||
|
extern PFN_vkQueueSubmit vkQueueSubmit;
|
||
|
extern PFN_vkQueueWaitIdle vkQueueWaitIdle;
|
||
|
extern PFN_vkResetCommandBuffer vkResetCommandBuffer;
|
||
|
extern PFN_vkResetCommandPool vkResetCommandPool;
|
||
|
extern PFN_vkResetDescriptorPool vkResetDescriptorPool;
|
||
|
extern PFN_vkResetFences vkResetFences;
|
||
|
extern PFN_vkUnmapMemory vkUnmapMemory;
|
||
|
extern PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
|
||
|
extern PFN_vkWaitForFences vkWaitForFences;
|
||
|
|
||
|
// instance extension capability
|
||
|
extern int support_VK_KHR_external_memory_capabilities;
|
||
|
extern int support_VK_KHR_get_physical_device_properties2;
|
||
|
extern int support_VK_KHR_get_surface_capabilities2;
|
||
|
extern int support_VK_KHR_surface;
|
||
|
extern int support_VK_EXT_debug_utils;
|
||
|
extern int support_VK_EXT_validation_features;
|
||
|
extern int support_VK_EXT_validation_flags;
|
||
|
#if __ANDROID_API__ >= 26
|
||
|
extern int support_VK_KHR_android_surface;
|
||
|
#endif // __ANDROID_API__ >= 26
|
||
|
|
||
|
// VK_KHR_cooperative_matrix
|
||
|
extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR;
|
||
|
|
||
|
// VK_KHR_external_memory_capabilities
|
||
|
extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR;
|
||
|
|
||
|
// VK_KHR_get_physical_device_properties2
|
||
|
extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
|
||
|
extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
|
||
|
extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR;
|
||
|
extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR;
|
||
|
extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR;
|
||
|
extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR;
|
||
|
|
||
|
// VK_KHR_get_surface_capabilities2
|
||
|
extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR;
|
||
|
extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR;
|
||
|
|
||
|
// VK_KHR_surface
|
||
|
extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
|
||
|
extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR;
|
||
|
extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR;
|
||
|
extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR;
|
||
|
extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR;
|
||
|
|
||
|
#if __ANDROID_API__ >= 26
|
||
|
// VK_KHR_android_surface
|
||
|
extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR;
|
||
|
#endif // __ANDROID_API__ >= 26
|
||
|
|
||
|
// VK_NV_cooperative_matrix
|
||
|
extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV;
|
||
|
|
||
|
// get info
|
||
|
NCNN_EXPORT int get_gpu_count();
|
||
|
NCNN_EXPORT int get_default_gpu_index();
|
||
|
|
||
|
class GpuInfoPrivate;
|
||
|
class NCNN_EXPORT GpuInfo
|
||
|
{
|
||
|
public:
|
||
|
explicit GpuInfo();
|
||
|
virtual ~GpuInfo();
|
||
|
|
||
|
// vulkan physical device
|
||
|
VkPhysicalDevice physical_device() const;
|
||
|
|
||
|
// memory properties
|
||
|
const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const;
|
||
|
|
||
|
// info
|
||
|
uint32_t api_version() const;
|
||
|
uint32_t driver_version() const;
|
||
|
uint32_t vendor_id() const;
|
||
|
uint32_t device_id() const;
|
||
|
const char* device_name() const;
|
||
|
uint8_t* pipeline_cache_uuid() const;
|
||
|
|
||
|
// 0 = discrete gpu
|
||
|
// 1 = integrated gpu
|
||
|
// 2 = virtual gpu
|
||
|
// 3 = cpu
|
||
|
int type() const;
|
||
|
|
||
|
// hardware limit
|
||
|
uint32_t max_shared_memory_size() const;
|
||
|
uint32_t max_workgroup_count_x() const;
|
||
|
uint32_t max_workgroup_count_y() const;
|
||
|
uint32_t max_workgroup_count_z() const;
|
||
|
uint32_t max_workgroup_invocations() const;
|
||
|
uint32_t max_workgroup_size_x() const;
|
||
|
uint32_t max_workgroup_size_y() const;
|
||
|
uint32_t max_workgroup_size_z() const;
|
||
|
size_t memory_map_alignment() const;
|
||
|
size_t buffer_offset_alignment() const;
|
||
|
size_t non_coherent_atom_size() const;
|
||
|
size_t buffer_image_granularity() const;
|
||
|
uint32_t max_image_dimension_1d() const;
|
||
|
uint32_t max_image_dimension_2d() const;
|
||
|
uint32_t max_image_dimension_3d() const;
|
||
|
float timestamp_period() const;
|
||
|
|
||
|
// runtime
|
||
|
uint32_t compute_queue_family_index() const;
|
||
|
uint32_t graphics_queue_family_index() const;
|
||
|
uint32_t transfer_queue_family_index() const;
|
||
|
|
||
|
uint32_t compute_queue_count() const;
|
||
|
uint32_t graphics_queue_count() const;
|
||
|
uint32_t transfer_queue_count() const;
|
||
|
|
||
|
// property
|
||
|
bool unified_compute_transfer_queue() const;
|
||
|
|
||
|
// subgroup
|
||
|
uint32_t subgroup_size() const;
|
||
|
bool support_subgroup_basic() const;
|
||
|
bool support_subgroup_vote() const;
|
||
|
bool support_subgroup_ballot() const;
|
||
|
bool support_subgroup_shuffle() const;
|
||
|
|
||
|
// bug is not feature
|
||
|
bool bug_storage_buffer_no_l1() const;
|
||
|
bool bug_corrupted_online_pipeline_cache() const;
|
||
|
bool bug_buffer_image_load_zero() const;
|
||
|
|
||
|
// but sometimes bug is a feature
|
||
|
bool bug_implicit_fp16_arithmetic() const;
|
||
|
|
||
|
// fp16 and int8 feature
|
||
|
bool support_fp16_packed() const;
|
||
|
bool support_fp16_storage() const;
|
||
|
bool support_fp16_uniform() const;
|
||
|
bool support_fp16_arithmetic() const;
|
||
|
bool support_int8_packed() const;
|
||
|
bool support_int8_storage() const;
|
||
|
bool support_int8_uniform() const;
|
||
|
bool support_int8_arithmetic() const;
|
||
|
|
||
|
// ycbcr conversion feature
|
||
|
bool support_ycbcr_conversion() const;
|
||
|
|
||
|
// cooperative matrix feature
|
||
|
bool support_cooperative_matrix() const;
|
||
|
bool support_cooperative_matrix_8_8_16() const;
|
||
|
bool support_cooperative_matrix_16_8_8() const;
|
||
|
bool support_cooperative_matrix_16_8_16() const;
|
||
|
bool support_cooperative_matrix_16_16_16() const;
|
||
|
|
||
|
// extension capability
|
||
|
int support_VK_KHR_8bit_storage() const;
|
||
|
int support_VK_KHR_16bit_storage() const;
|
||
|
int support_VK_KHR_bind_memory2() const;
|
||
|
int support_VK_KHR_buffer_device_address() const;
|
||
|
int support_VK_KHR_create_renderpass2() const;
|
||
|
int support_VK_KHR_cooperative_matrix() const;
|
||
|
int support_VK_KHR_dedicated_allocation() const;
|
||
|
int support_VK_KHR_descriptor_update_template() const;
|
||
|
int support_VK_KHR_external_memory() const;
|
||
|
int support_VK_KHR_get_memory_requirements2() const;
|
||
|
int support_VK_KHR_maintenance1() const;
|
||
|
int support_VK_KHR_maintenance2() const;
|
||
|
int support_VK_KHR_maintenance3() const;
|
||
|
int support_VK_KHR_multiview() const;
|
||
|
int support_VK_KHR_portability_subset() const;
|
||
|
int support_VK_KHR_push_descriptor() const;
|
||
|
int support_VK_KHR_sampler_ycbcr_conversion() const;
|
||
|
int support_VK_KHR_shader_float16_int8() const;
|
||
|
int support_VK_KHR_shader_float_controls() const;
|
||
|
int support_VK_KHR_storage_buffer_storage_class() const;
|
||
|
int support_VK_KHR_swapchain() const;
|
||
|
int support_VK_EXT_buffer_device_address() const;
|
||
|
int support_VK_EXT_descriptor_indexing() const;
|
||
|
int support_VK_EXT_memory_budget() const;
|
||
|
int support_VK_EXT_memory_priority() const;
|
||
|
int support_VK_EXT_queue_family_foreign() const;
|
||
|
int support_VK_AMD_device_coherent_memory() const;
|
||
|
#if __ANDROID_API__ >= 26
|
||
|
int support_VK_ANDROID_external_memory_android_hardware_buffer() const;
|
||
|
#endif // __ANDROID_API__ >= 26
|
||
|
int support_VK_NV_cooperative_matrix() const;
|
||
|
|
||
|
private:
|
||
|
GpuInfo(const GpuInfo&);
|
||
|
GpuInfo& operator=(const GpuInfo&);
|
||
|
|
||
|
private:
|
||
|
friend int create_gpu_instance(const char* driver_path);
|
||
|
GpuInfoPrivate* const d;
|
||
|
};
|
||
|
|
||
|
NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index());
|
||
|
|
||
|
class VkAllocator;
|
||
|
class VkCompute;
|
||
|
class Option;
|
||
|
class PipelineCache;
|
||
|
class VulkanDevicePrivate;
|
||
|
class NCNN_EXPORT VulkanDevice
|
||
|
{
|
||
|
public:
|
||
|
VulkanDevice(int device_index = get_default_gpu_index());
|
||
|
~VulkanDevice();
|
||
|
|
||
|
const GpuInfo& info;
|
||
|
|
||
|
VkDevice vkdevice() const;
|
||
|
|
||
|
VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const;
|
||
|
|
||
|
// with fixed workgroup size
|
||
|
VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const;
|
||
|
|
||
|
// helper for creating pipeline
|
||
|
int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const;
|
||
|
int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const;
|
||
|
int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector<vk_specialization_type>& specializations, VkPipeline* pipeline) const;
|
||
|
int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const;
|
||
|
|
||
|
uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const;
|
||
|
bool is_mappable(uint32_t memory_type_index) const;
|
||
|
bool is_coherent(uint32_t memory_type_index) const;
|
||
|
|
||
|
VkQueue acquire_queue(uint32_t queue_family_index) const;
|
||
|
void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const;
|
||
|
|
||
|
// allocator on this device
|
||
|
VkAllocator* acquire_blob_allocator() const;
|
||
|
void reclaim_blob_allocator(VkAllocator* allocator) const;
|
||
|
|
||
|
VkAllocator* acquire_staging_allocator() const;
|
||
|
void reclaim_staging_allocator(VkAllocator* allocator) const;
|
||
|
|
||
|
// immutable sampler for texelfetch
|
||
|
const VkSampler* immutable_texelfetch_sampler() const;
|
||
|
|
||
|
// dummy buffer image
|
||
|
VkMat get_dummy_buffer() const;
|
||
|
VkImageMat get_dummy_image() const;
|
||
|
VkImageMat get_dummy_image_readonly() const;
|
||
|
|
||
|
// pipeline cache on this device
|
||
|
const PipelineCache* get_pipeline_cache() const;
|
||
|
|
||
|
// test image allocation
|
||
|
bool shape_support_image_storage(const Mat& shape) const;
|
||
|
|
||
|
// current gpu heap memory budget in MB
|
||
|
uint32_t get_heap_budget() const;
|
||
|
|
||
|
// utility operator
|
||
|
void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
|
||
|
void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
|
||
|
void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
|
||
|
void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
|
||
|
|
||
|
// VK_KHR_bind_memory2
|
||
|
PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR;
|
||
|
PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR;
|
||
|
|
||
|
// VK_KHR_buffer_device_address
|
||
|
PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR;
|
||
|
PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR;
|
||
|
PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR;
|
||
|
|
||
|
// VK_KHR_descriptor_update_template
|
||
|
PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
|
||
|
PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
|
||
|
PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
|
||
|
|
||
|
// VK_KHR_get_memory_requirements2
|
||
|
PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR;
|
||
|
PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR;
|
||
|
|
||
|
// VK_KHR_maintenance1
|
||
|
PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR;
|
||
|
|
||
|
// VK_KHR_maintenance3
|
||
|
PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR;
|
||
|
|
||
|
// VK_KHR_push_descriptor
|
||
|
PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR;
|
||
|
PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR;
|
||
|
|
||
|
// VK_KHR_sampler_ycbcr_conversion
|
||
|
PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR;
|
||
|
PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR;
|
||
|
|
||
|
// VK_KHR_swapchain
|
||
|
PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR;
|
||
|
PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR;
|
||
|
PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR;
|
||
|
PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR;
|
||
|
PFN_vkQueuePresentKHR vkQueuePresentKHR;
|
||
|
|
||
|
// VK_EXT_buffer_device_address
|
||
|
PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT;
|
||
|
|
||
|
#if __ANDROID_API__ >= 26
|
||
|
// VK_ANDROID_external_memory_android_hardware_buffer
|
||
|
PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID;
|
||
|
PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID;
|
||
|
#endif // __ANDROID_API__ >= 26
|
||
|
|
||
|
protected:
|
||
|
// device extension
|
||
|
int init_device_extension();
|
||
|
|
||
|
private:
|
||
|
VulkanDevice(const VulkanDevice&);
|
||
|
VulkanDevice& operator=(const VulkanDevice&);
|
||
|
|
||
|
private:
|
||
|
VulkanDevicePrivate* const d;
|
||
|
};
|
||
|
|
||
|
NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index());
|
||
|
|
||
|
// online spirv compilation
|
||
|
NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector<uint32_t>& spirv);
|
||
|
NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector<uint32_t>& spirv);
|
||
|
NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector<uint32_t>& spirv);
|
||
|
|
||
|
// info from spirv
|
||
|
class NCNN_EXPORT ShaderInfo
|
||
|
{
|
||
|
public:
|
||
|
int specialization_count;
|
||
|
int binding_count;
|
||
|
int push_constant_count;
|
||
|
|
||
|
// 0 = null
|
||
|
// 1 = storage buffer
|
||
|
// 2 = storage image
|
||
|
// 3 = combined image sampler
|
||
|
int binding_types[16]; // 16 is large enough I think ...
|
||
|
|
||
|
int reserved_0;
|
||
|
int reserved_1;
|
||
|
int reserved_2;
|
||
|
int reserved_3;
|
||
|
};
|
||
|
|
||
|
NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info);
|
||
|
|
||
|
} // namespace ncnn
|
||
|
|
||
|
#endif // NCNN_VULKAN
|
||
|
|
||
|
#endif // NCNN_GPU_H
|