Program Listing for File QnnTFLiteDelegate.h

Return to documentation for file (include/QNN/QnnTFLiteDelegate.h)

//==============================================================================
//
//  Copyright (c) Qualcomm Technologies, Inc.
//  All Rights Reserved.
//  Confidential and Proprietary - Qualcomm Technologies, Inc.
//
//==============================================================================
#ifndef TENSORFLOW_LITE_DELEGATES_QNN_QNN_TFLITE_DELEGATE_H_
#define TENSORFLOW_LITE_DELEGATES_QNN_QNN_TFLITE_DELEGATE_H_

#include "tensorflow/lite/c/common.h"

#ifndef QNN_DELEGATE_CAPI_EXPORT
#define QNN_DELEGATE_CAPI_EXPORT
#endif /* QNN_DELEGATE_CAPI_EXPORT */

#ifdef __cplusplus
extern "C" {
#endif  // __cplusplus

// Provide values to use for API version
// NOLINTBEGIN(cppcoreguidelines-macro-usage)
#define QNN_DELEGATE_API_VERSION_MAJOR 0
#define QNN_DELEGATE_API_VERSION_MINOR 24
#define QNN_DELEGATE_API_VERSION_PATCH 0
// NOLINTEND(cppcoreguidelines-macro-usage)

/// A struct which is used to provide a version number using 3 values:
/// major, minor, patch
typedef struct {  // NOLINT(modernize-use-using)
  uint32_t major;
  uint32_t minor;
  uint32_t patch;
} QnnDelegateApiVersion;

/// The QNN backend used to delegate the model's nodes. Each backend has
/// its own set of supported ops and tensor types.
typedef enum TfLiteQnnDelegateBackendType {  // NOLINT(modernize-use-using)
  kUndefinedBackend = 0,
  /// Backend for Adreno<sup>TM</sup> GPU hardware accelerator.
  kGpuBackend,
  /// Backend for Hexagon HTP hardware accelerator.
  kHtpBackend,
  /// Backend for Hexagon DSP hardware accelerator.
  kDspBackend,
  /// Backend for serializing model into dlc
  kIrBackend,
} TfLiteQnnDelegateBackendType;

/// Logging level of the delegate and QNN backend.
typedef enum TfLiteQnnDelegateLogLevel {  // NOLINT(modernize-use-using)
  /// Disable delegate and QNN backend logging messages.
  kLogOff = 0,
  kLogLevelError = 1,
  kLogLevelWarn = 2,
  kLogLevelInfo = 3,
  kLogLevelVerbose = 4,
  kLogLevelDebug = 5,
} TfLiteQnnDelegateLogLevel;

/// Options to set Graph Priority. This is directly mapped to Qnn_Priority_t.
/// Please refer to QNN SDK for additional information.
typedef enum TfLiteQnnDelegateGraphPriority {  // NOLINT(modernize-use-using)
  kQnnPriorityDefault = 0,
  kQnnPriorityLow,
  kQnnPriorityNormal,
  kQnnPriorityNormalHigh,
  kQnnPriorityHigh,
  kQnnPriorityUndefined,
} TfLiteQnnDelegateGraphPriority;

/// Options to profile the QNN Delegate execution.
typedef enum TfLiteQnnDelegateProfilingOptions {  // NOLINT(modernize-use-using)
  kProfilingOff = 0,
  kBasicProfiling,
  kPerOpProfiling,
  kLintingProfiling,
} TfLiteQnnDelegateProfilingOptions;

/// Defines the optimization levels of the graph tensors that are not input
/// nor output tensors. This enum controls the trade-off between performance
/// and accuracy.
typedef enum TfLiteQnnDelegateGpuPrecision {  // NOLINT(modernize-use-using)
  kGpuUserProvided = 0,
  kGpuFp32,
  kGpuFp16,
  kGpuHybrid,
} TfLiteQnnDelegateGpuPrecision;

/// Defines performance modes available for GPU backend.
typedef enum TfLiteQnnDelegateGpuPerformanceMode {  // NOLINT(modernize-use-using)
  kGpuDefault = 0,
  kGpuHigh,
  kGpuNormal,
  kGpuLow,
} TfLiteQnnDelegateGpuPerformanceMode;

/// Defines performance modes available for HTP backend.
typedef enum TfLiteQnnDelegateHtpPerformanceMode {  // NOLINT(modernize-use-using)
  kHtpDefault = 0,
  kHtpSustainedHighPerformance = 1,
  kHtpBurst = 2,
  kHtpHighPerformance = 3,
  kHtpPowerSaver = 4,
  kHtpLowPowerSaver = 5,
  kHtpHighPowerSaver = 6,
  kHtpLowBalanced = 7,
  kHtpBalanced = 8,
  kHtpExtremePowerSaver = 9,
} TfLiteQnnDelegateHtpPerformanceMode;

/// Defines performance modes available for DSP backend.
typedef enum TfLiteQnnDelegateDspPerformanceMode {  // NOLINT(modernize-use-using)
  kDspDefault = 0,
  kDspSustainedHighPerformance = 1,
  kDspBurst = 2,
  kDspHighPerformance = 3,
  kDspPowerSaver = 4,
  kDspLowPowerSaver = 5,
  kDspHighPowerSaver = 6,
  kDspLowBalanced = 7,
  kDspBalanced = 8,
} TfLiteQnnDelegateDspPerformanceMode;

///   Defines performance control strategy
///
///   **Manual**: The performance mode is voted as the backend is initialized,
///   and released at the moment of the backend is destroyed.
///
///   Users can control the vote/release of the performance mode by
///   TfLiteQnnDelegateSetPerf().
///
///   Note that this is the default strategy.
///
///   For example, users can vote before inference starts, and release after all
///   invocations are complete.
///
///   ~~~~~~~~~~~~~{.cpp}
///      TfLiteQnnDelegateSetPerf(delegate, kPerformanceVote);
///      // invoke inferences...
///      TfLiteQnnDelegateSetPerf(delegate, kPerformanceRelease);
///   ~~~~~~~~~~~~~
///
///   **AUTO**: QNN Delegate votes before starting inference, and releases after
///   an idle interval.
typedef enum TfLiteQnnDelegateHtpPerfCtrlStrategy {  // NOLINT(modernize-use-using)
  kHtpPerfCtrlManual = 0,
  kHtpPerfCtrlAuto = 1,
} TfLiteQnnDelegateHtpPerfCtrlStrategy;

/// Defines DSP performance control strategy. Similar to HTP cases.
typedef enum TfLiteQnnDelegateDspPerfCtrlStrategy {  // NOLINT(modernize-use-using)
  kDspPerfCtrlManual = 0,
  kDspPerfCtrlAuto = 1,
} TfLiteQnnDelegateDspPerfCtrlStrategy;

/// Defines pd sessions available for DSP backend.
typedef enum TfLiteQnnDelegateDspPdSession {  // NOLINT(modernize-use-using)
  kDspUnsignedPd = 0,
  kDspSignedPd,
  kDspAdaptivePd,
} TfLiteQnnDelegateDspPdSession;

/// Defines encoding for DSP backend. Dynamic encoding is more precise but
/// sacrifices a bit of performance.
typedef enum TfLiteQnnDelegateDspEncoding {  // NOLINT(modernize-use-using)
  kDspStatic = 0,
  kDspDynamic = 1,
  kDspUnknown = 0x7fffffff,
} TfLiteQnnDelegateDspEncoding;

/// Defines pd sessions available for HTP backend.
typedef enum TfLiteQnnDelegateHtpPdSession {  // NOLINT(modernize-use-using)
  kHtpUnsignedPd = 0,
  kHtpSignedPd,
} TfLiteQnnDelegateHtpPdSession;

/// Defines the optimization levels of the graph tensors that are not input nor
/// output tensors. This enum controls the trade-off between performance and
/// accuracy.
typedef enum TfLiteQnnDelegateHtpPrecision {  // NOLINT(modernize-use-using)
  kHtpQuantized = 0,
  kHtpFp16,
} TfLiteQnnDelegateHtpPrecision;

/// Defines the optimization strategy used by the HTP backend.
/// \ref kHtpOptimizeForInference will have longer preparation time, but more
/// optimal graph. \ref kHtpOptimizeForPrepare will have shorter preparation
/// time, but less optimal graph. \ref kHtpOptimizeForInferenceO3 will take into
/// account QNN_HTP_DEVICE_CONFIG_OPTION_SOC configuration when possible. When
/// SOC information is taken into account, O3 configuration is expected to
/// provide more optimal graph in most cases, but may result in less optimal
/// graph in some cases. Please check HTP section in Qnn docs for more detail.
typedef enum TfLiteQnnDelegateHtpOptimizationStrategy {  // NOLINT(modernize-use-using)
  kHtpOptimizeForInference = 0,
  kHtpOptimizeForPrepare,
  kHtpOptimizeForInferenceO3,
} TfLiteQnnDelegateHtpOptimizationStrategy;

/// Defines the performance action used by TfLiteQnnDelegateSetPerf()
typedef enum TfLiteQnnDelegatePerformanceAction {  // NOLINT(modernize-use-using)
  kPerformanceVote = 0,
  kPerformanceRelease = 1,
} TfLiteQnnDelegatePerformanceAction;

/// Specifies the backend options for the GPU backend. To be used when selecting
/// \ref TfLiteQnnDelegateBackendType.kGpuBackend for the \ref
/// TfLiteQnnDelegateOptions.backend_type.
typedef struct {  // NOLINT
  /// The default precision is half float for the best performance.
  TfLiteQnnDelegateGpuPrecision precision;
  /// The default performance mode sets high.
  TfLiteQnnDelegateGpuPerformanceMode performance_mode;
  /// The QNN GPU backend supports on-disk kernel persistence strategies where
  /// compiled GPU kernel binaries are cached to disk and can be shared across
  /// models having the same kernels and improve warm init times significantly.
  const char* kernel_repo_dir;
} TfLiteQnnDelegateGpuBackendOptions;

// clang-format off
#define QNN_DELEGATE_GPU_OPTION_INIT   \
  {                                   \
    kGpuFp16,    /*precision*/        \
    kGpuDefault, /*performance_mode*/ \
    ""           /*kernel_repo_dir*/  \
  }
// clang-format on

/// Specifies the backend options for the HTP backend. To be used when selecting
/// \ref TfLiteQnnDelegateBackendType.kGpuBackend for the \ref
/// TfLiteQnnDelegateOptions.backend_type.
typedef struct {  // NOLINT
  /// The default performance mode sets no configurations on the HTP.
  TfLiteQnnDelegateHtpPerformanceMode performance_mode;
  /// The default performance control strategy is Manual.
  TfLiteQnnDelegateHtpPerfCtrlStrategy perf_ctrl_strategy;
  /// The default precision mode supports quantized networks. Other precision
  /// modes may only be supported on certain SoCs.
  TfLiteQnnDelegateHtpPrecision precision;
  /// Signed or unsigned HTP PD session. The default PD session is unsigned.
  TfLiteQnnDelegateHtpPdSession pd_session;
  /// The default optimization strategy will optimize the graph for inference.
  TfLiteQnnDelegateHtpOptimizationStrategy optimization_strategy;
  /// When using short conv hmx, one might have better performance,
  /// but convolution that have short depth and/or weights that are not
  /// symmetric could exhibit inaccurate results.
  bool useConvHmx;
  /// When using fold relu, one might have better performance. This optimization
  /// is correct when quantization ranges for convolution are equal to or are
  /// subset of the Relu operation.
  bool useFoldRelu;
  /// Option to set VTCM size in MB. This is directly mapped to
  /// QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE under QnnHtpGraph_ConfigOption_t. If
  /// VTCM size is set to 0, the default VTCM size will be used.
  /// If VTCM size is greater than VTCM size available for this device,
  /// it will be set to the maximum VTCM size for this device.
  uint32_t vtcm_size;
  /// Option to set number of HVX threads. This is directly mapped to
  /// QNN_HTP_GRAPH_CONFIG_OPTION_NUM_HVX_THREADS under
  /// QnnHtpGraph_ConfigOption_t. If this this option is set to 0, the default
  /// number of HVX threads will be used. If input exceeds the max number of HVX
  /// threads, the maximum number of threads supported will be used.
  uint32_t num_hvx_threads;
  /// Some SoCs come with more than 1 HTP device. You can set which HTP device
  /// you want to run the model on by this attribute.
  /// But in most cases, you can just use the default device_id.
  uint32_t device_id;
} TfLiteQnnDelegateHtpBackendOptions;

// clang-format off
#define QNN_DELEGATE_HTP_OPTION_INIT                      \
  {                                                       \
    kHtpDefault,              /*performance_mode*/        \
    kHtpPerfCtrlManual,       /*perf_ctrl_strategy*/      \
    kHtpFp16,                 /*precision*/               \
    kHtpUnsignedPd,           /*pd_session*/              \
    kHtpOptimizeForInference, /*optimization_strategy*/   \
    true,                     /*useConvHmx*/              \
    false,                    /*useFoldRelu*/             \
    0,                        /*vtcm_size*/               \
    0,                        /*num_hvx_threads*/         \
    0,                        /*device_id*/               \
  }
// clang-format on

/// Specifies the backend options for the DSP backend. To be used when selecting
/// kDspBackend as the <backend_type>.
typedef struct {  // NOLINT
  /// The default performance mode sets no configurations on the DSP.
  TfLiteQnnDelegateDspPerformanceMode performance_mode;
  /// The default performance control strategy is Manual.
  TfLiteQnnDelegateDspPerfCtrlStrategy perf_ctrl_strategy;
  /// The default PD session is unsigned.
  TfLiteQnnDelegateDspPdSession pd_session;
  /// The default Encoding is static
  TfLiteQnnDelegateDspEncoding encoding;
} TfLiteQnnDelegateDspBackendOptions;

// clang-format off
#define QNN_DELEGATE_DSP_OPTION_INIT                      \
  {                                                       \
    kDspDefault,              /*performance_mode*/        \
    kDspPerfCtrlManual,       /*perf_ctrl_strategy*/      \
    kDspUnsignedPd,           /*pd_session*/              \
    kDspStatic,               /*encoding*/                \
  }
// clang-format on

/// Specifies the backend options for the IR writer backend. To be used when
/// selecting \ref TfLiteQnnDelegateBackendType.kIrBackend for the \ref
/// TfLiteQnnDelegateOptions.backend_type.
typedef struct {  // NOLINT
  const char* output_path;
} TfLiteQnnDelegateIrBackendOptions;

// clang-format off
#define QNN_DELEGATE_IR_OPTION_INIT                      \
  {                                                      \
    nullptr,              /*output_path*/                \
  }
// clang-format on

/// Map of TFLite custom operator name to op type defined within an op package.
typedef struct {  // NOLINT
  /// The TfLiteRegistration::custom_name set during registration.
  const char* custom_op_name;
  /// The corresponding op type name defined in the op package.
  const char* qnn_op_type_name;
} TfLiteQnnDelegateOpPackageOpMap;

// clang-format off
#define QNN_DELEGATE_OP_PACKAGE_OPTION_INIT   \
  {                                           \
    0,              /*num_op_package_infos*/  \
    nullptr,        /*op_package_infos*/      \
  }
// clang-format on

/// Structure containing the information needed to register and use an op
/// package with QNN.
typedef struct {  // NOLINT
  /// The name of the op package.
  const char* op_package_name;
  /// The path on disk to the op package library.
  const char* op_package_path;
  /// The name of a function in the op package library which satisfies the
  /// QnnOpPackage_InterfaceProvider_t interface.
  const char* interface_provider;
  /// The target which this op package library was compiled for.
  const char* target;
  /// Number of elements in the TfLiteQnnDelegateOpPackageInfo.ops_map array.
  int num_ops_map;
  /// An array of TfLiteQnnDelegateOpPackageOpMap structures.
  TfLiteQnnDelegateOpPackageOpMap* ops_map;
} TfLiteQnnDelegateOpPackageInfo;

typedef struct {  // NOLINT
  /// Number of elements in TfLiteQnnDelegateOpPackageOptions.op_package_infos
  /// array.
  int num_op_package_infos;
  /// An array of TfLiteQnnDelegateOpPackageInfo structures.
  TfLiteQnnDelegateOpPackageInfo* op_package_infos;
} TfLiteQnnDelegateOpPackageOptions;

typedef struct {  // NOLINT
  /// Set ops not to be delegated manually based on the op id(s).
  /// To obtain all the op ids, please refer to tensorflow/lite/builtin_ops.h.
  /// Notice that we skip all of with the types specified in the
  /// \ref skip_delegate_ops array. For example, if you set skip to include
  /// SquaredDifference, all instances of SquaredDifference ops in the
  /// model will not be delegated.
  const int* skip_delegate_ops;
  /// Indicates the length of \ref skip_delegate_ops array.
  uint32_t skip_delegate_ops_nr;
  /// Set node IDs not to be delegated.
  /// Node id can be obtained by node's location information in .tflite.
  const int* skip_delegate_node_ids;
  /// Indicates the length of \ref skip_delegate_node_ids array.
  uint32_t skip_delegate_node_ids_nr;
} TfLiteQnnDelegateSkipOption;

// clang-format off
#define QNN_DELEGATE_SKIP_OPTION_INIT          \
  {                                            \
    nullptr,     /*skip_delegate_ops*/         \
    0,           /*skip_delegate_ops_nr*/      \
    nullptr,     /*skip_delegate_node_ids*/    \
    0,           /*skip_delegate_node_ids_nr*/ \
  }
// clang-format on

typedef struct {  // NOLINT
  /// The backend QNN library to open and execute the graph with. This is a
  /// required argument and will error out if kUndefinedBackend is supplied.
  TfLiteQnnDelegateBackendType backend_type;

  /// Optional parameter to override the QNN backend library.
  const char* library_path;

  /// Optional parameter specifying the directory of QNN Skel library. Only
  /// useful for backends which have a Skel library.
  const char* skel_library_dir;

  /// Optional backend specific options for the GPU backend. Only used when
  /// selecting \ref TfLiteQnnDelegateBackendType.kGpuBackend, otherwise will be
  /// ignored.
  TfLiteQnnDelegateGpuBackendOptions gpu_options;

  /// Optional backend specific options for the HTP backend. Only used when
  /// selecting \ref TfLiteQnnDelegateBackendType.kHtpBackend, otherwise will be
  /// ignored.
  TfLiteQnnDelegateHtpBackendOptions htp_options;

  /// Optional backend specific options for the DSP backend. Only used when
  /// selecting \ref TfLiteQnnDelegateBackendType.kDspBackend, otherwise will be
  /// ignored.
  TfLiteQnnDelegateDspBackendOptions dsp_options;

  /// Optional backend specific options for the IR backend. Only used when
  /// selecting \ref TfLiteQnnDelegateBackendType.kIrBackend, otherwise will be
  /// ignored.
  TfLiteQnnDelegateIrBackendOptions ir_options;

  /// Logging level of the delegate and the backend. Default is off.
  TfLiteQnnDelegateLogLevel log_level;

  /// Option to enable profiling with the delegate. Default is off.
  TfLiteQnnDelegateProfilingOptions profiling;

  /// Optional structure to specify op packages loaded and used by the backend.
  TfLiteQnnDelegateOpPackageOptions op_package_options;

  /// Tensor dump output path. If a path is given, Delegate will write
  /// outputs of each OP there.
  /// We don't recommend using this option. It exists only for debugging
  /// accuracy issues.
  const char* tensor_dump_output_path;

  /// Specifies the directory of a compiled model. Signals intent to either:
  ///   * Save the model if the file doesn't exist, or
  ///   * Restore model from the file.
  ///
  /// Model Cache specific options. Only used when setting \ref model_token,
  /// otherwise will be ignored.
  ///
  /// We don't recommend that delegate instances with/without cache be mixed in
  /// same process, unless an instance <b>without</b> cache is initialized,
  /// invoked, and *terminated* before an instance with cache is used in order
  /// to make sure all resources are prepared correctly.
  ///
  ///   ~~~~~~~~~~~~~{.cpp}
  ///
  ///   TfLiteDelegate* delegate_wo_cache =
  ///   TfLiteQnnDelegateCreate(&options_wo_cache);
  ///   interpreter_0->ModifyGraphWithDelegate(delegate_wo_cache);
  ///
  ///   // Perform inference with interpreter_0
  ///
  ///   TfLiteQnnDelegateDelete(delegate_wo_cache);
  ///
  ///   // after this, another delegate_with_cache can be used in the same
  ///   // process, though not recommended at this moment.
  ///   TfLiteDelegate* delegate_with_cache =
  ///   TfLiteQnnDelegateCreate(&options_with_cache);
  ///
  ///   // another interpreter
  ///   interpreter_1->ModifyGraphWithDelegate(delegate_with_cache);
  ///
  ///   // more delegates...etc.
  ///   ~~~~~~~~~~~~~
  const char* cache_dir;
  /// The unique null-terminated token string that acts as a ‘namespace’ for all
  /// serialization entries. Should be unique to a particular model (graph &
  /// constants). For an example of how to generate this from a TFLite model,
  /// see StrFingerprint() in lite/delegates/serialization.h.
  ///
  /// Model Cache specific options. Only used when setting \ref cache_dir,
  /// otherwise will be ignored.
  const char* model_token;
  /// Option to skip node by specifying node types or node ids.
  TfLiteQnnDelegateSkipOption skip_options;
  /// Option to set graph priority.
  TfLiteQnnDelegateGraphPriority graph_priority;
} TfLiteQnnDelegateOptions;

// clang-format off
#define QNN_DELEGATE_OPTION_INIT                                        \
  {                                                                     \
    kUndefinedBackend,                    /*backend_type*/              \
    "",                                   /*library_path*/              \
    "",                                   /*skel_library_dir*/          \
    QNN_DELEGATE_GPU_OPTION_INIT,         /*gpu_options*/               \
    QNN_DELEGATE_HTP_OPTION_INIT,         /*htp_options*/               \
    QNN_DELEGATE_DSP_OPTION_INIT,         /*dsp_options*/               \
    QNN_DELEGATE_IR_OPTION_INIT,          /*ir_options*/                \
    kLogOff,                              /*log_level*/                 \
    kProfilingOff,                        /*profiling*/                 \
    QNN_DELEGATE_OP_PACKAGE_OPTION_INIT,  /*op_package_options*/        \
    "",                                   /*tensor_dump_output_path*/   \
    "",                                   /*cache_dir*/                 \
    "",                                   /*model_token*/               \
    QNN_DELEGATE_SKIP_OPTION_INIT,        /*skip_options*/              \
    kQnnPriorityDefault,                  /*graph_priority*/            \
  }
// clang-format on

typedef int32_t  // NOLINT(modernize-use-using)
    TfLiteQnnDelegateCapabilityStatus;

// NOLINTBEGIN(cppcoreguidelines-macro-usage)
/// Return by TfLiteQnnDelegateHasCapability() if the capability is supported.
#define TfLiteQnnDelegateCapabilitySupported 1
/// Return by TfLiteQnnDelegateHasCapability() if the capability is not
/// supported.
#define TfLiteQnnDelegateCapabilityNotSupported 0
// NOLINTEND(cppcoreguidelines-macro-usage)

/// Defines possible QNN Delegate capabilities.
typedef enum TfLiteQnnDelegateCapability {  // NOLINT(modernize-use-using)
  kCapHtpRuntimeQuant = 0,
  kCapHtpRuntimeFp16 = 1,
  kCapGpuRuntime = 2,
  kCapDspRuntime = 3,
} TfLiteQnnDelegateCapability;

/// Create the QNN Delegate options structure and populate with default values.
QNN_DELEGATE_CAPI_EXPORT TfLiteQnnDelegateOptions
TfLiteQnnDelegateOptionsDefault();

/// Create the QNN Delegate with the specified options.
QNN_DELEGATE_CAPI_EXPORT TfLiteDelegate* TfLiteQnnDelegateCreate(
    const TfLiteQnnDelegateOptions* options);

/// Delete the QNN Delegate once no longer required.
///
/// Note that this is not a thread-safe function, which might cause unexpected
/// behaviour when using it with \ref TfLiteQnnDelegateSetPerf, \ref
/// TfLiteQnnDelegateUpdateHtpPerfMode, \ref TfLiteQnnDelegateUpdateDspPerfMode,
/// or \ref TfLiteQnnDelegateDelete at the same time.
QNN_DELEGATE_CAPI_EXPORT void TfLiteQnnDelegateDelete(TfLiteDelegate* delegate);

/// Manually vote or release performance mode. "Vote" to request hardware to
/// obey the performance mode setting as soon as possible. "Release" to
/// release the vote. Note that this API only work for HTP/DSP backend with \ref
/// kHtpPerfCtrlManual or \ref kDspPerfCtrlManual. Return true for success,
/// false for failure.
QNN_DELEGATE_CAPI_EXPORT bool TfLiteQnnDelegateSetPerf(
    TfLiteDelegate* delegate, const TfLiteQnnDelegatePerformanceAction action);

/// Detect whether the capability is supported on the platform running QNN
/// Delegate.
///
/// Note that this is an experimental feature.
QNN_DELEGATE_CAPI_EXPORT TfLiteQnnDelegateCapabilityStatus
TfLiteQnnDelegateHasCapability(const TfLiteQnnDelegateCapability cap);

/// This API changes the performance mode of a created QNN Delegate on HTP
/// backend, returning `true` for the mode set correctly, `false` for any
/// failure.
///
/// It will perform a vote after a successful update. If the strategy of
/// performance controlling is **manual**, the new mode takes effect before this
/// API returns.
///
/// Note that this API cannot be called during graph invocation, and this is an
/// experimental feature.
QNN_DELEGATE_CAPI_EXPORT bool TfLiteQnnDelegateUpdateHtpPerfMode(
    TfLiteDelegate* delegate, const TfLiteQnnDelegateHtpPerformanceMode mode);

/// This API changes the performance mode of a created QNN Delegate on DSP
/// backend, returning `true` for the mode set correctly, `false` for any
/// failure.
QNN_DELEGATE_CAPI_EXPORT bool TfLiteQnnDelegateUpdateDspPerfMode(
    TfLiteDelegate* delegate, const TfLiteQnnDelegateDspPerformanceMode mode);

/// Get QNN Delegate API version.
QNN_DELEGATE_CAPI_EXPORT QnnDelegateApiVersion TfLiteQnnDelegateGetApiVersion();

/// Allocate specific tensors (usually graph inputs and outputs) on shared
/// memory. Users are responsible to allocate "enough" tensor bytes, and set
/// alignment as kDefaultTensorAlignment. The function returns a valid pointer
/// if allocation is successful.
///
/// Note that this is an experimental feature.
QNN_DELEGATE_CAPI_EXPORT void* TfLiteQnnDelegateAllocCustomMem(
    size_t bytes, size_t alignment);

/// Free the allocated shared memory.
///
/// Note that this is an experimental feature.
QNN_DELEGATE_CAPI_EXPORT void TfLiteQnnDelegateFreeCustomMem(void* buffer_ptr);

/// Structure of profiling result.
typedef struct {  // NOLINT(modernize-use-using)
  /// Buffer of profiling result
  /// will be invalid once TfLiteQnnDelegateClearProfilingResult gets called
  const uint8_t* buffer;
  /// Buffer length of profiling result in bytes
  uint32_t buffer_length;
} TfLiteQnnDelegateProfilingResult;

/// Get profiling result.
QNN_DELEGATE_CAPI_EXPORT TfLiteQnnDelegateProfilingResult
TfLiteQnnDelegateGetProfilingResult(TfLiteDelegate* delegate);

/// Free the recorded profiling result.
QNN_DELEGATE_CAPI_EXPORT void TfLiteQnnDelegateClearProfilingResult(
    TfLiteDelegate* delegate);

#ifdef __cplusplus
}
#endif  // __cplusplus

#endif  // TENSORFLOW_LITE_DELEGATES_QNN_QNN_TFLITE_DELEGATE_H_