File QnnTFLiteDelegate.h¶
↰ Parent directory (include/QNN)
Contents
Definition (include/QNN/QnnTFLiteDelegate.h)¶
Includes¶
tensorflow/lite/c/common.h
Classes¶
Enums¶
Functions¶
Defines¶
Typedefs¶
Full File Listing¶
Defines
-
QNN_DELEGATE_CAPI_EXPORT¶
-
QNN_DELEGATE_API_VERSION_MAJOR 0¶
-
QNN_DELEGATE_API_VERSION_MINOR 24¶
-
QNN_DELEGATE_API_VERSION_PATCH 0¶
-
QNN_DELEGATE_GPU_OPTION_INIT { \
kGpuFp16, /*precision*/ \
kGpuDefault
, /*performance_mode*/ \
"" /*kernel_repo_dir*/ \
}
¶
-
QNN_DELEGATE_HTP_OPTION_INIT { \
kHtpDefault, /*performance_mode*/ \
kHtpPerfCtrlManual, /*perf_ctrl_strategy*/ \
kHtpFp16, /*precision*/ \
kHtpUnsignedPd, /*pd_session*/ \
kHtpOptimizeForInference
, /*optimization_strategy*/ \
true, /*useConvHmx*/ \
false, /*useFoldRelu*/ \
0, /*vtcm_size*/ \
0, /*num_hvx_threads*/ \
0, /*device_id*/ \
}
¶
-
QNN_DELEGATE_DSP_OPTION_INIT { \
kDspDefault, /*performance_mode*/ \
kDspPerfCtrlManual, /*perf_ctrl_strategy*/ \
kDspUnsignedPd, /*pd_session*/ \
kDspStatic
, /*encoding*/ \
}
¶
-
QNN_DELEGATE_IR_OPTION_INIT
{ \
nullptr, /*output_path*/ \
}
¶
-
QNN_DELEGATE_OP_PACKAGE_OPTION_INIT
{ \
0, /*num_op_package_infos*/ \
nullptr, /*op_package_infos*/ \
}
¶
-
QNN_DELEGATE_SKIP_OPTION_INIT
{ \
nullptr, /*skip_delegate_ops*/ \
0, /*skip_delegate_ops_nr*/ \
nullptr, /*skip_delegate_node_ids*/ \
0, /*skip_delegate_node_ids_nr*/ \
}
¶
-
QNN_DELEGATE_OPTION_INIT { \
kUndefinedBackend
, /*backend_type*/ \
"", /*library_path*/ \
"", /*skel_library_dir*/ \
QNN_DELEGATE_GPU_OPTION_INIT, /*gpu_options*/ \ QNN_DELEGATE_HTP_OPTION_INIT, /*htp_options*/ \ QNN_DELEGATE_DSP_OPTION_INIT, /*dsp_options*/ \ QNN_DELEGATE_IR_OPTION_INIT, /*ir_options*/ \ kLogOff, /*log_level*/ \ kProfilingOff, /*profiling*/ \ QNN_DELEGATE_OP_PACKAGE_OPTION_INIT, /*op_package_options*/ \
"", /*tensor_dump_output_path*/ \
"", /*cache_dir*/ \
"", /*model_token*/ \
QNN_DELEGATE_SKIP_OPTION_INIT, /*skip_options*/ \ kQnnPriorityDefault, /*graph_priority*/ \
}
¶
-
TfLiteQnnDelegateCapabilitySupported 1¶
Return by TfLiteQnnDelegateHasCapability() if the capability is supported.
-
TfLiteQnnDelegateCapabilityNotSupported 0¶
Return by TfLiteQnnDelegateHasCapability() if the capability is not supported.
Typedefs
-
typedef enum TfLiteQnnDelegateBackendType TfLiteQnnDelegateBackendType
The QNN backend used to delegate the model’s nodes. Each backend has its own set of supported ops and tensor types.
-
typedef enum TfLiteQnnDelegateLogLevel TfLiteQnnDelegateLogLevel
Logging level of the delegate and QNN backend.
-
typedef enum TfLiteQnnDelegateGraphPriority TfLiteQnnDelegateGraphPriority
Options to set Graph Priority. This is directly mapped to Qnn_Priority_t. Please refer to QNN SDK for additional information.
-
typedef enum TfLiteQnnDelegateProfilingOptions TfLiteQnnDelegateProfilingOptions
Options to profile the QNN Delegate execution.
-
typedef enum TfLiteQnnDelegateGpuPrecision TfLiteQnnDelegateGpuPrecision
Defines the optimization levels of the graph tensors that are not input nor output tensors. This enum controls the trade-off between performance and accuracy.
-
typedef enum TfLiteQnnDelegateGpuPerformanceMode TfLiteQnnDelegateGpuPerformanceMode
Defines performance modes available for GPU backend.
-
typedef enum TfLiteQnnDelegateHtpPerformanceMode TfLiteQnnDelegateHtpPerformanceMode
Defines performance modes available for HTP backend.
-
typedef enum TfLiteQnnDelegateDspPerformanceMode TfLiteQnnDelegateDspPerformanceMode
Defines performance modes available for DSP backend.
-
typedef enum TfLiteQnnDelegateHtpPerfCtrlStrategy TfLiteQnnDelegateHtpPerfCtrlStrategy
Defines performance control strategy
Manual: The performance mode is voted as the backend is initialized, and released at the moment of the backend is destroyed.
Users can control the vote/release of the performance mode by TfLiteQnnDelegateSetPerf().
Note that this is the default strategy.
For example, users can vote before inference starts, and release after all invocations are complete.
TfLiteQnnDelegateSetPerf(delegate, kPerformanceVote); // invoke inferences... TfLiteQnnDelegateSetPerf(delegate, kPerformanceRelease);
AUTO: QNN Delegate votes before starting inference, and releases after an idle interval.
-
typedef enum TfLiteQnnDelegateDspPerfCtrlStrategy TfLiteQnnDelegateDspPerfCtrlStrategy
Defines DSP performance control strategy. Similar to HTP cases.
-
typedef enum TfLiteQnnDelegateDspPdSession TfLiteQnnDelegateDspPdSession
Defines pd sessions available for DSP backend.
-
typedef enum TfLiteQnnDelegateDspEncoding TfLiteQnnDelegateDspEncoding
Defines encoding for DSP backend. Dynamic encoding is more precise but sacrifices a bit of performance.
-
typedef enum TfLiteQnnDelegateHtpPdSession TfLiteQnnDelegateHtpPdSession
Defines pd sessions available for HTP backend.
-
typedef enum TfLiteQnnDelegateHtpPrecision TfLiteQnnDelegateHtpPrecision
Defines the optimization levels of the graph tensors that are not input nor output tensors. This enum controls the trade-off between performance and accuracy.
-
typedef enum TfLiteQnnDelegateHtpOptimizationStrategy TfLiteQnnDelegateHtpOptimizationStrategy
Defines the optimization strategy used by the HTP backend. kHtpOptimizeForInference will have longer preparation time, but more optimal graph. kHtpOptimizeForPrepare will have shorter preparation time, but less optimal graph. kHtpOptimizeForInferenceO3 will take into account QNN_HTP_DEVICE_CONFIG_OPTION_SOC configuration when possible. When SOC information is taken into account, O3 configuration is expected to provide more optimal graph in most cases, but may result in less optimal graph in some cases. Please check HTP section in Qnn docs for more detail.
-
typedef enum TfLiteQnnDelegatePerformanceAction TfLiteQnnDelegatePerformanceAction
Defines the performance action used by TfLiteQnnDelegateSetPerf()
-
typedef int32_t TfLiteQnnDelegateCapabilityStatus
-
typedef enum TfLiteQnnDelegateCapability TfLiteQnnDelegateCapability
Defines possible QNN Delegate capabilities.
Enums
-
enum TfLiteQnnDelegateBackendType
The QNN backend used to delegate the model’s nodes. Each backend has its own set of supported ops and tensor types.
Values:
-
enumerator kUndefinedBackend = 0¶
-
enumerator kGpuBackend¶
Backend for AdrenoTM GPU hardware accelerator.
-
enumerator kHtpBackend¶
Backend for Hexagon HTP hardware accelerator.
-
enumerator kDspBackend¶
Backend for Hexagon DSP hardware accelerator.
-
enumerator kIrBackend¶
Backend for serializing model into dlc.
-
enumerator kUndefinedBackend = 0¶
-
enum TfLiteQnnDelegateLogLevel
Logging level of the delegate and QNN backend.
Values:
-
enumerator kLogOff = 0¶
Disable delegate and QNN backend logging messages.
-
enumerator kLogLevelError = 1¶
-
enumerator kLogLevelWarn = 2¶
-
enumerator kLogLevelInfo = 3¶
-
enumerator kLogLevelVerbose = 4¶
-
enumerator kLogLevelDebug = 5¶
-
enumerator kLogOff = 0¶
-
enum TfLiteQnnDelegateGraphPriority
Options to set Graph Priority. This is directly mapped to Qnn_Priority_t. Please refer to QNN SDK for additional information.
Values:
-
enumerator kQnnPriorityDefault = 0¶
-
enumerator kQnnPriorityLow¶
-
enumerator kQnnPriorityNormal¶
-
enumerator kQnnPriorityNormalHigh¶
-
enumerator kQnnPriorityHigh¶
-
enumerator kQnnPriorityUndefined¶
-
enumerator kQnnPriorityDefault = 0¶
-
enum TfLiteQnnDelegateProfilingOptions
Options to profile the QNN Delegate execution.
Values:
-
enumerator kProfilingOff = 0¶
-
enumerator kBasicProfiling¶
-
enumerator kPerOpProfiling¶
-
enumerator kLintingProfiling¶
-
enumerator kProfilingOff = 0¶
-
enum TfLiteQnnDelegateGpuPrecision
Defines the optimization levels of the graph tensors that are not input nor output tensors. This enum controls the trade-off between performance and accuracy.
Values:
-
enumerator kGpuUserProvided = 0¶
-
enumerator kGpuFp32¶
-
enumerator kGpuFp16¶
-
enumerator kGpuHybrid¶
-
enumerator kGpuUserProvided = 0¶
-
enum TfLiteQnnDelegateGpuPerformanceMode
Defines performance modes available for GPU backend.
Values:
-
enumerator kGpuDefault = 0¶
-
enumerator kGpuHigh¶
-
enumerator kGpuNormal¶
-
enumerator kGpuLow¶
-
enumerator kGpuDefault = 0¶
-
enum TfLiteQnnDelegateHtpPerformanceMode
Defines performance modes available for HTP backend.
Values:
-
enumerator kHtpDefault = 0¶
-
enumerator kHtpSustainedHighPerformance = 1¶
-
enumerator kHtpBurst = 2¶
-
enumerator kHtpHighPerformance = 3¶
-
enumerator kHtpPowerSaver = 4¶
-
enumerator kHtpLowPowerSaver = 5¶
-
enumerator kHtpHighPowerSaver = 6¶
-
enumerator kHtpLowBalanced = 7¶
-
enumerator kHtpBalanced = 8¶
-
enumerator kHtpExtremePowerSaver = 9¶
-
enumerator kHtpDefault = 0¶
-
enum TfLiteQnnDelegateDspPerformanceMode
Defines performance modes available for DSP backend.
Values:
-
enumerator kDspDefault = 0¶
-
enumerator kDspSustainedHighPerformance = 1¶
-
enumerator kDspBurst = 2¶
-
enumerator kDspHighPerformance = 3¶
-
enumerator kDspPowerSaver = 4¶
-
enumerator kDspLowPowerSaver = 5¶
-
enumerator kDspHighPowerSaver = 6¶
-
enumerator kDspLowBalanced = 7¶
-
enumerator kDspBalanced = 8¶
-
enumerator kDspDefault = 0¶
-
enum TfLiteQnnDelegateHtpPerfCtrlStrategy
Defines performance control strategy
Manual: The performance mode is voted as the backend is initialized, and released at the moment of the backend is destroyed.
Users can control the vote/release of the performance mode by TfLiteQnnDelegateSetPerf().
Note that this is the default strategy.
For example, users can vote before inference starts, and release after all invocations are complete.
TfLiteQnnDelegateSetPerf(delegate, kPerformanceVote); // invoke inferences... TfLiteQnnDelegateSetPerf(delegate, kPerformanceRelease);
AUTO: QNN Delegate votes before starting inference, and releases after an idle interval.
Values:
-
enumerator kHtpPerfCtrlManual = 0¶
-
enumerator kHtpPerfCtrlAuto = 1¶
-
enumerator kHtpPerfCtrlManual = 0¶
-
enum TfLiteQnnDelegateDspPerfCtrlStrategy
Defines DSP performance control strategy. Similar to HTP cases.
Values:
-
enumerator kDspPerfCtrlManual = 0¶
-
enumerator kDspPerfCtrlAuto = 1¶
-
enumerator kDspPerfCtrlManual = 0¶
-
enum TfLiteQnnDelegateDspPdSession
Defines pd sessions available for DSP backend.
Values:
-
enumerator kDspUnsignedPd = 0¶
-
enumerator kDspSignedPd¶
-
enumerator kDspAdaptivePd¶
-
enumerator kDspUnsignedPd = 0¶
-
enum TfLiteQnnDelegateDspEncoding
Defines encoding for DSP backend. Dynamic encoding is more precise but sacrifices a bit of performance.
Values:
-
enumerator kDspStatic = 0¶
-
enumerator kDspDynamic = 1¶
-
enumerator kDspUnknown = 0x7fffffff¶
-
enumerator kDspStatic = 0¶
-
enum TfLiteQnnDelegateHtpPdSession
Defines pd sessions available for HTP backend.
Values:
-
enumerator kHtpUnsignedPd = 0¶
-
enumerator kHtpSignedPd¶
-
enumerator kHtpUnsignedPd = 0¶
-
enum TfLiteQnnDelegateHtpPrecision
Defines the optimization levels of the graph tensors that are not input nor output tensors. This enum controls the trade-off between performance and accuracy.
Values:
-
enumerator kHtpQuantized = 0¶
-
enumerator kHtpFp16¶
-
enumerator kHtpQuantized = 0¶
-
enum TfLiteQnnDelegateHtpOptimizationStrategy
Defines the optimization strategy used by the HTP backend. kHtpOptimizeForInference will have longer preparation time, but more optimal graph. kHtpOptimizeForPrepare will have shorter preparation time, but less optimal graph. kHtpOptimizeForInferenceO3 will take into account QNN_HTP_DEVICE_CONFIG_OPTION_SOC configuration when possible. When SOC information is taken into account, O3 configuration is expected to provide more optimal graph in most cases, but may result in less optimal graph in some cases. Please check HTP section in Qnn docs for more detail.
Values:
-
enumerator kHtpOptimizeForInference = 0¶
-
enumerator kHtpOptimizeForPrepare¶
-
enumerator kHtpOptimizeForInferenceO3¶
-
enumerator kHtpOptimizeForInference = 0¶
-
enum TfLiteQnnDelegatePerformanceAction
Defines the performance action used by TfLiteQnnDelegateSetPerf()
Values:
-
enumerator kPerformanceVote = 0¶
-
enumerator kPerformanceRelease = 1¶
-
enumerator kPerformanceVote = 0¶
Functions
-
TfLiteQnnDelegateOptions TfLiteQnnDelegateOptionsDefault()¶
Create the QNN Delegate options structure and populate with default values.
-
TfLiteDelegate *TfLiteQnnDelegateCreate(const TfLiteQnnDelegateOptions *options)¶
Create the QNN Delegate with the specified options.
-
void TfLiteQnnDelegateDelete(TfLiteDelegate *delegate)¶
Delete the QNN Delegate once no longer required.
Note that this is not a thread-safe function, which might cause unexpected behaviour when using it with TfLiteQnnDelegateSetPerf, TfLiteQnnDelegateUpdateHtpPerfMode, TfLiteQnnDelegateUpdateDspPerfMode, or TfLiteQnnDelegateDelete at the same time.
-
bool TfLiteQnnDelegateSetPerf(TfLiteDelegate *delegate, const TfLiteQnnDelegatePerformanceAction action)¶
Manually vote or release performance mode. “Vote” to request hardware to obey the performance mode setting as soon as possible. “Release” to release the vote. Note that this API only work for HTP/DSP backend with kHtpPerfCtrlManual or kDspPerfCtrlManual. Return true for success, false for failure.
-
TfLiteQnnDelegateCapabilityStatus TfLiteQnnDelegateHasCapability(const TfLiteQnnDelegateCapability cap)¶
Detect whether the capability is supported on the platform running QNN Delegate.
Note that this is an experimental feature.
-
bool TfLiteQnnDelegateUpdateHtpPerfMode(TfLiteDelegate *delegate, const TfLiteQnnDelegateHtpPerformanceMode mode)¶
This API changes the performance mode of a created QNN Delegate on HTP backend, returning
truefor the mode set correctly,falsefor any failure.It will perform a vote after a successful update. If the strategy of performance controlling is manual, the new mode takes effect before this API returns.
Note that this API cannot be called during graph invocation, and this is an experimental feature.
-
bool TfLiteQnnDelegateUpdateDspPerfMode(TfLiteDelegate *delegate, const TfLiteQnnDelegateDspPerformanceMode mode)¶
This API changes the performance mode of a created QNN Delegate on DSP backend, returning
truefor the mode set correctly,falsefor any failure.
-
QnnDelegateApiVersion TfLiteQnnDelegateGetApiVersion()¶
Get QNN Delegate API version.
-
void *TfLiteQnnDelegateAllocCustomMem(size_t bytes, size_t alignment)¶
Allocate specific tensors (usually graph inputs and outputs) on shared memory. Users are responsible to allocate “enough” tensor bytes, and set alignment as kDefaultTensorAlignment. The function returns a valid pointer if allocation is successful.
Note that this is an experimental feature.
-
void TfLiteQnnDelegateFreeCustomMem(void *buffer_ptr)¶
Free the allocated shared memory.
Note that this is an experimental feature.
-
TfLiteQnnDelegateProfilingResult TfLiteQnnDelegateGetProfilingResult(TfLiteDelegate *delegate)¶
Get profiling result.
-
void TfLiteQnnDelegateClearProfilingResult(TfLiteDelegate *delegate)¶
Free the recorded profiling result.
-
struct QnnDelegateApiVersion¶
- #include <QnnTFLiteDelegate.h>
A struct which is used to provide a version number using 3 values: major, minor, patch
-
struct TfLiteQnnDelegateGpuBackendOptions
- #include <QnnTFLiteDelegate.h>
Specifies the backend options for the GPU backend. To be used when selecting TfLiteQnnDelegateBackendType::kGpuBackend for the TfLiteQnnDelegateOptions::backend_type.
Public Members
-
TfLiteQnnDelegateGpuPrecision precision
The default precision is half float for the best performance.
-
TfLiteQnnDelegateGpuPerformanceMode performance_mode
The default performance mode sets high.
-
const char *kernel_repo_dir
The QNN GPU backend supports on-disk kernel persistence strategies where compiled GPU kernel binaries are cached to disk and can be shared across models having the same kernels and improve warm init times significantly.
-
TfLiteQnnDelegateGpuPrecision precision
-
struct TfLiteQnnDelegateHtpBackendOptions
- #include <QnnTFLiteDelegate.h>
Specifies the backend options for the HTP backend. To be used when selecting TfLiteQnnDelegateBackendType::kGpuBackend for the TfLiteQnnDelegateOptions::backend_type.
Public Members
-
TfLiteQnnDelegateHtpPerformanceMode performance_mode
The default performance mode sets no configurations on the HTP.
-
TfLiteQnnDelegateHtpPerfCtrlStrategy perf_ctrl_strategy
The default performance control strategy is Manual.
-
TfLiteQnnDelegateHtpPrecision precision
The default precision mode supports quantized networks. Other precision modes may only be supported on certain SoCs.
-
TfLiteQnnDelegateHtpPdSession pd_session
Signed or unsigned HTP PD session. The default PD session is unsigned.
-
TfLiteQnnDelegateHtpOptimizationStrategy optimization_strategy
The default optimization strategy will optimize the graph for inference.
-
bool useConvHmx
When using short conv hmx, one might have better performance, but convolution that have short depth and/or weights that are not symmetric could exhibit inaccurate results.
-
bool useFoldRelu
When using fold relu, one might have better performance. This optimization is correct when quantization ranges for convolution are equal to or are subset of the Relu operation.
-
uint32_t vtcm_size
Option to set VTCM size in MB. This is directly mapped to QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE under QnnHtpGraph_ConfigOption_t. If VTCM size is set to 0, the default VTCM size will be used. If VTCM size is greater than VTCM size available for this device, it will be set to the maximum VTCM size for this device.
-
uint32_t num_hvx_threads
Option to set number of HVX threads. This is directly mapped to QNN_HTP_GRAPH_CONFIG_OPTION_NUM_HVX_THREADS under QnnHtpGraph_ConfigOption_t. If this this option is set to 0, the default number of HVX threads will be used. If input exceeds the max number of HVX threads, the maximum number of threads supported will be used.
-
uint32_t device_id
Some SoCs come with more than 1 HTP device. You can set which HTP device you want to run the model on by this attribute. But in most cases, you can just use the default device_id.
-
TfLiteQnnDelegateHtpPerformanceMode performance_mode
-
struct TfLiteQnnDelegateDspBackendOptions
- #include <QnnTFLiteDelegate.h>
Specifies the backend options for the DSP backend. To be used when selecting kDspBackend as the <backend_type>.
Public Members
-
TfLiteQnnDelegateDspPerformanceMode performance_mode
The default performance mode sets no configurations on the DSP.
-
TfLiteQnnDelegateDspPerfCtrlStrategy perf_ctrl_strategy
The default performance control strategy is Manual.
-
TfLiteQnnDelegateDspPdSession pd_session
The default PD session is unsigned.
-
TfLiteQnnDelegateDspEncoding encoding
The default Encoding is static.
-
TfLiteQnnDelegateDspPerformanceMode performance_mode
-
struct TfLiteQnnDelegateIrBackendOptions
- #include <QnnTFLiteDelegate.h>
Specifies the backend options for the IR writer backend. To be used when selecting TfLiteQnnDelegateBackendType::kIrBackend for the TfLiteQnnDelegateOptions::backend_type.
Public Members
-
const char *output_path
-
const char *output_path
-
struct TfLiteQnnDelegateOpPackageOpMap
- #include <QnnTFLiteDelegate.h>
Map of TFLite custom operator name to op type defined within an op package.
Public Members
-
const char *custom_op_name
The TfLiteRegistration::custom_name set during registration.
-
const char *qnn_op_type_name
The corresponding op type name defined in the op package.
-
const char *custom_op_name
-
struct TfLiteQnnDelegateOpPackageInfo
- #include <QnnTFLiteDelegate.h>
Structure containing the information needed to register and use an op package with QNN.
Public Members
-
const char *op_package_name
The name of the op package.
-
const char *op_package_path
The path on disk to the op package library.
-
const char *interface_provider
The name of a function in the op package library which satisfies the QnnOpPackage_InterfaceProvider_t interface.
-
const char *target
The target which this op package library was compiled for.
-
int num_ops_map
Number of elements in the TfLiteQnnDelegateOpPackageInfo.ops_map array.
-
TfLiteQnnDelegateOpPackageOpMap *ops_map
An array of TfLiteQnnDelegateOpPackageOpMap structures.
-
const char *op_package_name
-
struct TfLiteQnnDelegateOpPackageOptions
- #include <QnnTFLiteDelegate.h>
Public Members
-
int num_op_package_infos
Number of elements in TfLiteQnnDelegateOpPackageOptions.op_package_infos array.
-
TfLiteQnnDelegateOpPackageInfo *op_package_infos
An array of TfLiteQnnDelegateOpPackageInfo structures.
-
int num_op_package_infos
-
struct TfLiteQnnDelegateSkipOption
- #include <QnnTFLiteDelegate.h>
Public Members
-
const int *skip_delegate_ops
Set ops not to be delegated manually based on the op id(s). To obtain all the op ids, please refer to tensorflow/lite/builtin_ops.h. Notice that we skip all of with the types specified in the skip_delegate_ops array. For example, if you set skip to include SquaredDifference, all instances of SquaredDifference ops in the model will not be delegated.
-
uint32_t skip_delegate_ops_nr
Indicates the length of skip_delegate_ops array.
-
const int *skip_delegate_node_ids
Set node IDs not to be delegated. Node id can be obtained by node’s location information in .tflite.
-
uint32_t skip_delegate_node_ids_nr
Indicates the length of skip_delegate_node_ids array.
-
const int *skip_delegate_ops
-
struct TfLiteQnnDelegateOptions
- #include <QnnTFLiteDelegate.h>
Public Members
-
TfLiteQnnDelegateBackendType backend_type
The backend QNN library to open and execute the graph with. This is a required argument and will error out if kUndefinedBackend is supplied.
-
const char *library_path
Optional parameter to override the QNN backend library.
-
const char *skel_library_dir
Optional parameter specifying the directory of QNN Skel library. Only useful for backends which have a Skel library.
-
TfLiteQnnDelegateGpuBackendOptions gpu_options
Optional backend specific options for the GPU backend. Only used when selecting TfLiteQnnDelegateBackendType::kGpuBackend, otherwise will be ignored.
-
TfLiteQnnDelegateHtpBackendOptions htp_options
Optional backend specific options for the HTP backend. Only used when selecting TfLiteQnnDelegateBackendType::kHtpBackend, otherwise will be ignored.
-
TfLiteQnnDelegateDspBackendOptions dsp_options
Optional backend specific options for the DSP backend. Only used when selecting TfLiteQnnDelegateBackendType::kDspBackend, otherwise will be ignored.
-
TfLiteQnnDelegateIrBackendOptions ir_options
Optional backend specific options for the IR backend. Only used when selecting TfLiteQnnDelegateBackendType::kIrBackend, otherwise will be ignored.
-
TfLiteQnnDelegateLogLevel log_level
Logging level of the delegate and the backend. Default is off.
-
TfLiteQnnDelegateProfilingOptions profiling
Option to enable profiling with the delegate. Default is off.
-
TfLiteQnnDelegateOpPackageOptions op_package_options
Optional structure to specify op packages loaded and used by the backend.
-
const char *tensor_dump_output_path
Tensor dump output path. If a path is given, Delegate will write outputs of each OP there. We don’t recommend using this option. It exists only for debugging accuracy issues.
-
const char *cache_dir
Specifies the directory of a compiled model. Signals intent to either:
Save the model if the file doesn’t exist, or
Restore model from the file.
Model Cache specific options. Only used when setting model_token, otherwise will be ignored.
We don’t recommend that delegate instances with/without cache be mixed in same process, unless an instance without cache is initialized, invoked, and terminated before an instance with cache is used in order to make sure all resources are prepared correctly.
TfLiteDelegate* delegate_wo_cache = TfLiteQnnDelegateCreate(&options_wo_cache); interpreter_0->ModifyGraphWithDelegate(delegate_wo_cache); // Perform inference with interpreter_0 TfLiteQnnDelegateDelete(delegate_wo_cache); // after this, another delegate_with_cache can be used in the same // process, though not recommended at this moment. TfLiteDelegate* delegate_with_cache = TfLiteQnnDelegateCreate(&options_with_cache); // another interpreter interpreter_1->ModifyGraphWithDelegate(delegate_with_cache); // more delegates...etc.
-
const char *model_token
The unique null-terminated token string that acts as a ‘namespace’ for all serialization entries. Should be unique to a particular model (graph & constants). For an example of how to generate this from a TFLite model, see StrFingerprint() in lite/delegates/serialization.h.
Model Cache specific options. Only used when setting cache_dir, otherwise will be ignored.
-
TfLiteQnnDelegateSkipOption skip_options
Option to skip node by specifying node types or node ids.
-
TfLiteQnnDelegateGraphPriority graph_priority
Option to set graph priority.
-
TfLiteQnnDelegateBackendType backend_type
-
struct TfLiteQnnDelegateProfilingResult
- #include <QnnTFLiteDelegate.h>
Structure of profiling result.
Public Members
-
const uint8_t *buffer
Buffer of profiling result will be invalid once TfLiteQnnDelegateClearProfilingResult gets called
-
uint32_t buffer_length
Buffer length of profiling result in bytes.
-
const uint8_t *buffer