C Interface

Defines

QNN_DELEGATE_CAPI_EXPORT
QNN_DELEGATE_API_VERSION_MAJOR 0
QNN_DELEGATE_API_VERSION_MINOR 24
QNN_DELEGATE_API_VERSION_PATCH 0
QNN_DELEGATE_GPU_OPTION_INIT   {                                   \     kGpuFp16,    /*precision*/        \     kGpuDefault

, /*performance_mode*/ \

""           /*kernel_repo_dir*/  \

}


QNN_DELEGATE_HTP_OPTION_INIT   {                                                       \     kHtpDefault,              /*performance_mode*/        \     kHtpPerfCtrlManual,       /*perf_ctrl_strategy*/      \     kHtpFp16,                 /*precision*/               \     kHtpUnsignedPd,           /*pd_session*/              \     kHtpOptimizeForInference

, /*optimization_strategy*/   \

true,                     /*useConvHmx*/              \

false,                    /*useFoldRelu*/             \

0,                        /*vtcm_size*/               \

0,                        /*num_hvx_threads*/         \

0,                        /*device_id*/               \

}


QNN_DELEGATE_DSP_OPTION_INIT   {                                                       \     kDspDefault,              /*performance_mode*/        \     kDspPerfCtrlManual,       /*perf_ctrl_strategy*/      \     kDspUnsignedPd,           /*pd_session*/              \     kDspStatic

,               /*encoding*/                \

}


QNN_DELEGATE_IR_OPTION_INIT

{                                                      \

nullptr,              /*output_path*/                \

}


QNN_DELEGATE_OP_PACKAGE_OPTION_INIT

{                                           \

0,              /*num_op_package_infos*/  \

nullptr,        /*op_package_infos*/      \

}


QNN_DELEGATE_SKIP_OPTION_INIT

{                                            \

nullptr,     /*skip_delegate_ops*/         \

0,           /*skip_delegate_ops_nr*/      \

nullptr,     /*skip_delegate_node_ids*/    \

0,           /*skip_delegate_node_ids_nr*/ \

}


QNN_DELEGATE_OPTION_INIT   {                                                                     \     kUndefinedBackend

,                    /*backend_type*/              \

"",                                   /*library_path*/              \

"",                                   /*skel_library_dir*/          \

QNN_DELEGATE_GPU_OPTION_INIT,         /*gpu_options*/               \     QNN_DELEGATE_HTP_OPTION_INIT,         /*htp_options*/               \     QNN_DELEGATE_DSP_OPTION_INIT,         /*dsp_options*/               \     QNN_DELEGATE_IR_OPTION_INIT,          /*ir_options*/                \     kLogOff,                              /*log_level*/                 \     kProfilingOff,                        /*profiling*/                 \     QNN_DELEGATE_OP_PACKAGE_OPTION_INIT

,  /*op_package_options*/        \

"",                                   /*tensor_dump_output_path*/   \

"",                                   /*cache_dir*/                 \

"",                                   /*model_token*/               \

QNN_DELEGATE_SKIP_OPTION_INIT,        /*skip_options*/              \     kQnnPriorityDefault

,                  /*graph_priority*/            \

}


TfLiteQnnDelegateCapabilitySupported 1

Return by TfLiteQnnDelegateHasCapability() if the capability is supported.

TfLiteQnnDelegateCapabilityNotSupported 0

Return by TfLiteQnnDelegateHasCapability() if the capability is not supported.

Typedefs

typedef enum TfLiteQnnDelegateBackendType TfLiteQnnDelegateBackendType

The QNN backend used to delegate the model’s nodes. Each backend has its own set of supported ops and tensor types.

typedef enum TfLiteQnnDelegateLogLevel TfLiteQnnDelegateLogLevel

Logging level of the delegate and QNN backend.

typedef enum TfLiteQnnDelegateGraphPriority TfLiteQnnDelegateGraphPriority

Options to set Graph Priority. This is directly mapped to Qnn_Priority_t. Please refer to QNN SDK for additional information.

typedef enum TfLiteQnnDelegateProfilingOptions TfLiteQnnDelegateProfilingOptions

Options to profile the QNN Delegate execution.

typedef enum TfLiteQnnDelegateGpuPrecision TfLiteQnnDelegateGpuPrecision

Defines the optimization levels of the graph tensors that are not input nor output tensors. This enum controls the trade-off between performance and accuracy.

typedef enum TfLiteQnnDelegateGpuPerformanceMode TfLiteQnnDelegateGpuPerformanceMode

Defines performance modes available for GPU backend.

typedef enum TfLiteQnnDelegateHtpPerformanceMode TfLiteQnnDelegateHtpPerformanceMode

Defines performance modes available for HTP backend.

typedef enum TfLiteQnnDelegateDspPerformanceMode TfLiteQnnDelegateDspPerformanceMode

Defines performance modes available for DSP backend.

typedef enum TfLiteQnnDelegateHtpPerfCtrlStrategy TfLiteQnnDelegateHtpPerfCtrlStrategy

Defines performance control strategy

Manual: The performance mode is voted as the backend is initialized, and released at the moment of the backend is destroyed.

Users can control the vote/release of the performance mode by TfLiteQnnDelegateSetPerf().

Note that this is the default strategy.

For example, users can vote before inference starts, and release after all invocations are complete.

TfLiteQnnDelegateSetPerf(delegate, kPerformanceVote);
// invoke inferences...
TfLiteQnnDelegateSetPerf(delegate, kPerformanceRelease);

AUTO: QNN Delegate votes before starting inference, and releases after an idle interval.

typedef enum TfLiteQnnDelegateDspPerfCtrlStrategy TfLiteQnnDelegateDspPerfCtrlStrategy

Defines DSP performance control strategy. Similar to HTP cases.

typedef enum TfLiteQnnDelegateDspPdSession TfLiteQnnDelegateDspPdSession

Defines pd sessions available for DSP backend.

typedef enum TfLiteQnnDelegateDspEncoding TfLiteQnnDelegateDspEncoding

Defines encoding for DSP backend. Dynamic encoding is more precise but sacrifices a bit of performance.

typedef enum TfLiteQnnDelegateHtpPdSession TfLiteQnnDelegateHtpPdSession

Defines pd sessions available for HTP backend.

typedef enum TfLiteQnnDelegateHtpPrecision TfLiteQnnDelegateHtpPrecision

Defines the optimization levels of the graph tensors that are not input nor output tensors. This enum controls the trade-off between performance and accuracy.

typedef enum TfLiteQnnDelegateHtpOptimizationStrategy TfLiteQnnDelegateHtpOptimizationStrategy

Defines the optimization strategy used by the HTP backend. kHtpOptimizeForInference will have longer preparation time, but more optimal graph. kHtpOptimizeForPrepare will have shorter preparation time, but less optimal graph. kHtpOptimizeForInferenceO3 will take into account QNN_HTP_DEVICE_CONFIG_OPTION_SOC configuration when possible. When SOC information is taken into account, O3 configuration is expected to provide more optimal graph in most cases, but may result in less optimal graph in some cases. Please check HTP section in Qnn docs for more detail.

typedef enum TfLiteQnnDelegatePerformanceAction TfLiteQnnDelegatePerformanceAction

Defines the performance action used by TfLiteQnnDelegateSetPerf()

typedef int32_t TfLiteQnnDelegateCapabilityStatus
typedef enum TfLiteQnnDelegateCapability TfLiteQnnDelegateCapability

Defines possible QNN Delegate capabilities.

Enums

enum TfLiteQnnDelegateBackendType

The QNN backend used to delegate the model’s nodes. Each backend has its own set of supported ops and tensor types.

Values:

enumerator kUndefinedBackend = 0
enumerator kGpuBackend

Backend for AdrenoTM GPU hardware accelerator.

enumerator kHtpBackend

Backend for Hexagon HTP hardware accelerator.

enumerator kDspBackend

Backend for Hexagon DSP hardware accelerator.

enumerator kIrBackend

Backend for serializing model into dlc.

enum TfLiteQnnDelegateLogLevel

Logging level of the delegate and QNN backend.

Values:

enumerator kLogOff = 0

Disable delegate and QNN backend logging messages.

enumerator kLogLevelError = 1
enumerator kLogLevelWarn = 2
enumerator kLogLevelInfo = 3
enumerator kLogLevelVerbose = 4
enumerator kLogLevelDebug = 5
enum TfLiteQnnDelegateGraphPriority

Options to set Graph Priority. This is directly mapped to Qnn_Priority_t. Please refer to QNN SDK for additional information.

Values:

enumerator kQnnPriorityDefault = 0
enumerator kQnnPriorityLow
enumerator kQnnPriorityNormal
enumerator kQnnPriorityNormalHigh
enumerator kQnnPriorityHigh
enumerator kQnnPriorityUndefined
enum TfLiteQnnDelegateProfilingOptions

Options to profile the QNN Delegate execution.

Values:

enumerator kProfilingOff = 0
enumerator kBasicProfiling
enumerator kPerOpProfiling
enumerator kLintingProfiling
enum TfLiteQnnDelegateGpuPrecision

Defines the optimization levels of the graph tensors that are not input nor output tensors. This enum controls the trade-off between performance and accuracy.

Values:

enumerator kGpuUserProvided = 0
enumerator kGpuFp32
enumerator kGpuFp16
enumerator kGpuHybrid
enum TfLiteQnnDelegateGpuPerformanceMode

Defines performance modes available for GPU backend.

Values:

enumerator kGpuDefault = 0
enumerator kGpuHigh
enumerator kGpuNormal
enumerator kGpuLow
enum TfLiteQnnDelegateHtpPerformanceMode

Defines performance modes available for HTP backend.

Values:

enumerator kHtpDefault = 0
enumerator kHtpSustainedHighPerformance = 1
enumerator kHtpBurst = 2
enumerator kHtpHighPerformance = 3
enumerator kHtpPowerSaver = 4
enumerator kHtpLowPowerSaver = 5
enumerator kHtpHighPowerSaver = 6
enumerator kHtpLowBalanced = 7
enumerator kHtpBalanced = 8
enumerator kHtpExtremePowerSaver = 9
enum TfLiteQnnDelegateDspPerformanceMode

Defines performance modes available for DSP backend.

Values:

enumerator kDspDefault = 0
enumerator kDspSustainedHighPerformance = 1
enumerator kDspBurst = 2
enumerator kDspHighPerformance = 3
enumerator kDspPowerSaver = 4
enumerator kDspLowPowerSaver = 5
enumerator kDspHighPowerSaver = 6
enumerator kDspLowBalanced = 7
enumerator kDspBalanced = 8
enum TfLiteQnnDelegateHtpPerfCtrlStrategy

Defines performance control strategy

Manual: The performance mode is voted as the backend is initialized, and released at the moment of the backend is destroyed.

Users can control the vote/release of the performance mode by TfLiteQnnDelegateSetPerf().

Note that this is the default strategy.

For example, users can vote before inference starts, and release after all invocations are complete.

TfLiteQnnDelegateSetPerf(delegate, kPerformanceVote);
// invoke inferences...
TfLiteQnnDelegateSetPerf(delegate, kPerformanceRelease);

AUTO: QNN Delegate votes before starting inference, and releases after an idle interval.

Values:

enumerator kHtpPerfCtrlManual = 0
enumerator kHtpPerfCtrlAuto = 1
enum TfLiteQnnDelegateDspPerfCtrlStrategy

Defines DSP performance control strategy. Similar to HTP cases.

Values:

enumerator kDspPerfCtrlManual = 0
enumerator kDspPerfCtrlAuto = 1
enum TfLiteQnnDelegateDspPdSession

Defines pd sessions available for DSP backend.

Values:

enumerator kDspUnsignedPd = 0
enumerator kDspSignedPd
enumerator kDspAdaptivePd
enum TfLiteQnnDelegateDspEncoding

Defines encoding for DSP backend. Dynamic encoding is more precise but sacrifices a bit of performance.

Values:

enumerator kDspStatic = 0
enumerator kDspDynamic = 1
enumerator kDspUnknown = 0x7fffffff
enum TfLiteQnnDelegateHtpPdSession

Defines pd sessions available for HTP backend.

Values:

enumerator kHtpUnsignedPd = 0
enumerator kHtpSignedPd
enum TfLiteQnnDelegateHtpPrecision

Defines the optimization levels of the graph tensors that are not input nor output tensors. This enum controls the trade-off between performance and accuracy.

Values:

enumerator kHtpQuantized = 0
enumerator kHtpFp16
enum TfLiteQnnDelegateHtpOptimizationStrategy

Defines the optimization strategy used by the HTP backend. kHtpOptimizeForInference will have longer preparation time, but more optimal graph. kHtpOptimizeForPrepare will have shorter preparation time, but less optimal graph. kHtpOptimizeForInferenceO3 will take into account QNN_HTP_DEVICE_CONFIG_OPTION_SOC configuration when possible. When SOC information is taken into account, O3 configuration is expected to provide more optimal graph in most cases, but may result in less optimal graph in some cases. Please check HTP section in Qnn docs for more detail.

Values:

enumerator kHtpOptimizeForInference = 0
enumerator kHtpOptimizeForPrepare
enumerator kHtpOptimizeForInferenceO3
enum TfLiteQnnDelegatePerformanceAction

Defines the performance action used by TfLiteQnnDelegateSetPerf()

Values:

enumerator kPerformanceVote = 0
enumerator kPerformanceRelease = 1
enum TfLiteQnnDelegateCapability

Defines possible QNN Delegate capabilities.

Values:

enumerator kCapHtpRuntimeQuant = 0
enumerator kCapHtpRuntimeFp16 = 1
enumerator kCapGpuRuntime = 2
enumerator kCapDspRuntime = 3

Functions

TfLiteQnnDelegateOptions TfLiteQnnDelegateOptionsDefault()

Create the QNN Delegate options structure and populate with default values.

TfLiteDelegate *TfLiteQnnDelegateCreate(const TfLiteQnnDelegateOptions *options)

Create the QNN Delegate with the specified options.

void TfLiteQnnDelegateDelete(TfLiteDelegate *delegate)

Delete the QNN Delegate once no longer required.

Note that this is not a thread-safe function, which might cause unexpected behaviour when using it with TfLiteQnnDelegateSetPerf, TfLiteQnnDelegateUpdateHtpPerfMode, TfLiteQnnDelegateUpdateDspPerfMode, or TfLiteQnnDelegateDelete at the same time.

bool TfLiteQnnDelegateSetPerf(TfLiteDelegate *delegate, const TfLiteQnnDelegatePerformanceAction action)

Manually vote or release performance mode. “Vote” to request hardware to obey the performance mode setting as soon as possible. “Release” to release the vote. Note that this API only work for HTP/DSP backend with kHtpPerfCtrlManual or kDspPerfCtrlManual. Return true for success, false for failure.

TfLiteQnnDelegateCapabilityStatus TfLiteQnnDelegateHasCapability(const TfLiteQnnDelegateCapability cap)

Detect whether the capability is supported on the platform running QNN Delegate.

Note that this is an experimental feature.

bool TfLiteQnnDelegateUpdateHtpPerfMode(TfLiteDelegate *delegate, const TfLiteQnnDelegateHtpPerformanceMode mode)

This API changes the performance mode of a created QNN Delegate on HTP backend, returning true for the mode set correctly, false for any failure.

It will perform a vote after a successful update. If the strategy of performance controlling is manual, the new mode takes effect before this API returns.

Note that this API cannot be called during graph invocation, and this is an experimental feature.

bool TfLiteQnnDelegateUpdateDspPerfMode(TfLiteDelegate *delegate, const TfLiteQnnDelegateDspPerformanceMode mode)

This API changes the performance mode of a created QNN Delegate on DSP backend, returning true for the mode set correctly, false for any failure.

QnnDelegateApiVersion TfLiteQnnDelegateGetApiVersion()

Get QNN Delegate API version.

void *TfLiteQnnDelegateAllocCustomMem(size_t bytes, size_t alignment)

Allocate specific tensors (usually graph inputs and outputs) on shared memory. Users are responsible to allocate “enough” tensor bytes, and set alignment as kDefaultTensorAlignment. The function returns a valid pointer if allocation is successful.

Note that this is an experimental feature.

void TfLiteQnnDelegateFreeCustomMem(void *buffer_ptr)

Free the allocated shared memory.

Note that this is an experimental feature.

TfLiteQnnDelegateProfilingResult TfLiteQnnDelegateGetProfilingResult(TfLiteDelegate *delegate)

Get profiling result.

void TfLiteQnnDelegateClearProfilingResult(TfLiteDelegate *delegate)

Free the recorded profiling result.

struct QnnDelegateApiVersion
#include <QnnTFLiteDelegate.h>

A struct which is used to provide a version number using 3 values: major, minor, patch

Public Members

uint32_t major
uint32_t minor
uint32_t patch
struct TfLiteQnnDelegateGpuBackendOptions
#include <QnnTFLiteDelegate.h>

Specifies the backend options for the GPU backend. To be used when selecting TfLiteQnnDelegateBackendType::kGpuBackend for the TfLiteQnnDelegateOptions::backend_type.

Public Members

TfLiteQnnDelegateGpuPrecision precision

The default precision is half float for the best performance.

TfLiteQnnDelegateGpuPerformanceMode performance_mode

The default performance mode sets high.

const char *kernel_repo_dir

The QNN GPU backend supports on-disk kernel persistence strategies where compiled GPU kernel binaries are cached to disk and can be shared across models having the same kernels and improve warm init times significantly.

struct TfLiteQnnDelegateHtpBackendOptions
#include <QnnTFLiteDelegate.h>

Specifies the backend options for the HTP backend. To be used when selecting TfLiteQnnDelegateBackendType::kGpuBackend for the TfLiteQnnDelegateOptions::backend_type.

Public Members

TfLiteQnnDelegateHtpPerformanceMode performance_mode

The default performance mode sets no configurations on the HTP.

TfLiteQnnDelegateHtpPerfCtrlStrategy perf_ctrl_strategy

The default performance control strategy is Manual.

TfLiteQnnDelegateHtpPrecision precision

The default precision mode supports quantized networks. Other precision modes may only be supported on certain SoCs.

TfLiteQnnDelegateHtpPdSession pd_session

Signed or unsigned HTP PD session. The default PD session is unsigned.

TfLiteQnnDelegateHtpOptimizationStrategy optimization_strategy

The default optimization strategy will optimize the graph for inference.

bool useConvHmx

When using short conv hmx, one might have better performance, but convolution that have short depth and/or weights that are not symmetric could exhibit inaccurate results.

bool useFoldRelu

When using fold relu, one might have better performance. This optimization is correct when quantization ranges for convolution are equal to or are subset of the Relu operation.

uint32_t vtcm_size

Option to set VTCM size in MB. This is directly mapped to QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE under QnnHtpGraph_ConfigOption_t. If VTCM size is set to 0, the default VTCM size will be used. If VTCM size is greater than VTCM size available for this device, it will be set to the maximum VTCM size for this device.

uint32_t num_hvx_threads

Option to set number of HVX threads. This is directly mapped to QNN_HTP_GRAPH_CONFIG_OPTION_NUM_HVX_THREADS under QnnHtpGraph_ConfigOption_t. If this this option is set to 0, the default number of HVX threads will be used. If input exceeds the max number of HVX threads, the maximum number of threads supported will be used.

uint32_t device_id

Some SoCs come with more than 1 HTP device. You can set which HTP device you want to run the model on by this attribute. But in most cases, you can just use the default device_id.

struct TfLiteQnnDelegateDspBackendOptions
#include <QnnTFLiteDelegate.h>

Specifies the backend options for the DSP backend. To be used when selecting kDspBackend as the <backend_type>.

Public Members

TfLiteQnnDelegateDspPerformanceMode performance_mode

The default performance mode sets no configurations on the DSP.

TfLiteQnnDelegateDspPerfCtrlStrategy perf_ctrl_strategy

The default performance control strategy is Manual.

TfLiteQnnDelegateDspPdSession pd_session

The default PD session is unsigned.

TfLiteQnnDelegateDspEncoding encoding

The default Encoding is static.

struct TfLiteQnnDelegateIrBackendOptions
#include <QnnTFLiteDelegate.h>

Specifies the backend options for the IR writer backend. To be used when selecting TfLiteQnnDelegateBackendType::kIrBackend for the TfLiteQnnDelegateOptions::backend_type.

Public Members

const char *output_path
struct TfLiteQnnDelegateOpPackageOpMap
#include <QnnTFLiteDelegate.h>

Map of TFLite custom operator name to op type defined within an op package.

Public Members

const char *custom_op_name

The TfLiteRegistration::custom_name set during registration.

const char *qnn_op_type_name

The corresponding op type name defined in the op package.

struct TfLiteQnnDelegateOpPackageInfo
#include <QnnTFLiteDelegate.h>

Structure containing the information needed to register and use an op package with QNN.

Public Members

const char *op_package_name

The name of the op package.

const char *op_package_path

The path on disk to the op package library.

const char *interface_provider

The name of a function in the op package library which satisfies the QnnOpPackage_InterfaceProvider_t interface.

const char *target

The target which this op package library was compiled for.

int num_ops_map

Number of elements in the TfLiteQnnDelegateOpPackageInfo.ops_map array.

TfLiteQnnDelegateOpPackageOpMap *ops_map

An array of TfLiteQnnDelegateOpPackageOpMap structures.

struct TfLiteQnnDelegateOpPackageOptions
#include <QnnTFLiteDelegate.h>

Public Members

int num_op_package_infos

Number of elements in TfLiteQnnDelegateOpPackageOptions.op_package_infos array.

TfLiteQnnDelegateOpPackageInfo *op_package_infos

An array of TfLiteQnnDelegateOpPackageInfo structures.

struct TfLiteQnnDelegateSkipOption
#include <QnnTFLiteDelegate.h>

Public Members

const int *skip_delegate_ops

Set ops not to be delegated manually based on the op id(s). To obtain all the op ids, please refer to tensorflow/lite/builtin_ops.h. Notice that we skip all of with the types specified in the skip_delegate_ops array. For example, if you set skip to include SquaredDifference, all instances of SquaredDifference ops in the model will not be delegated.

uint32_t skip_delegate_ops_nr

Indicates the length of skip_delegate_ops array.

const int *skip_delegate_node_ids

Set node IDs not to be delegated. Node id can be obtained by node’s location information in .tflite.

uint32_t skip_delegate_node_ids_nr

Indicates the length of skip_delegate_node_ids array.

struct TfLiteQnnDelegateOptions
#include <QnnTFLiteDelegate.h>

Public Members

TfLiteQnnDelegateBackendType backend_type

The backend QNN library to open and execute the graph with. This is a required argument and will error out if kUndefinedBackend is supplied.

const char *library_path

Optional parameter to override the QNN backend library.

const char *skel_library_dir

Optional parameter specifying the directory of QNN Skel library. Only useful for backends which have a Skel library.

TfLiteQnnDelegateGpuBackendOptions gpu_options

Optional backend specific options for the GPU backend. Only used when selecting TfLiteQnnDelegateBackendType::kGpuBackend, otherwise will be ignored.

TfLiteQnnDelegateHtpBackendOptions htp_options

Optional backend specific options for the HTP backend. Only used when selecting TfLiteQnnDelegateBackendType::kHtpBackend, otherwise will be ignored.

TfLiteQnnDelegateDspBackendOptions dsp_options

Optional backend specific options for the DSP backend. Only used when selecting TfLiteQnnDelegateBackendType::kDspBackend, otherwise will be ignored.

TfLiteQnnDelegateIrBackendOptions ir_options

Optional backend specific options for the IR backend. Only used when selecting TfLiteQnnDelegateBackendType::kIrBackend, otherwise will be ignored.

TfLiteQnnDelegateLogLevel log_level

Logging level of the delegate and the backend. Default is off.

TfLiteQnnDelegateProfilingOptions profiling

Option to enable profiling with the delegate. Default is off.

TfLiteQnnDelegateOpPackageOptions op_package_options

Optional structure to specify op packages loaded and used by the backend.

const char *tensor_dump_output_path

Tensor dump output path. If a path is given, Delegate will write outputs of each OP there. We don’t recommend using this option. It exists only for debugging accuracy issues.

const char *cache_dir

Specifies the directory of a compiled model. Signals intent to either:

  • Save the model if the file doesn’t exist, or

  • Restore model from the file.

Model Cache specific options. Only used when setting model_token, otherwise will be ignored.

We don’t recommend that delegate instances with/without cache be mixed in same process, unless an instance without cache is initialized, invoked, and terminated before an instance with cache is used in order to make sure all resources are prepared correctly.

TfLiteDelegate* delegate_wo_cache =
TfLiteQnnDelegateCreate(&options_wo_cache);
interpreter_0->ModifyGraphWithDelegate(delegate_wo_cache);

// Perform inference with interpreter_0

TfLiteQnnDelegateDelete(delegate_wo_cache);

// after this, another delegate_with_cache can be used in the same
// process, though not recommended at this moment.
TfLiteDelegate* delegate_with_cache =
TfLiteQnnDelegateCreate(&options_with_cache);

// another interpreter
interpreter_1->ModifyGraphWithDelegate(delegate_with_cache);

// more delegates...etc.

const char *model_token

The unique null-terminated token string that acts as a ‘namespace’ for all serialization entries. Should be unique to a particular model (graph & constants). For an example of how to generate this from a TFLite model, see StrFingerprint() in lite/delegates/serialization.h.

Model Cache specific options. Only used when setting cache_dir, otherwise will be ignored.

TfLiteQnnDelegateSkipOption skip_options

Option to skip node by specifying node types or node ids.

TfLiteQnnDelegateGraphPriority graph_priority

Option to set graph priority.

struct TfLiteQnnDelegateProfilingResult
#include <QnnTFLiteDelegate.h>

Structure of profiling result.

Public Members

const uint8_t *buffer

Buffer of profiling result will be invalid once TfLiteQnnDelegateClearProfilingResult gets called

uint32_t buffer_length

Buffer length of profiling result in bytes.