Program Listing for File QnnHtpContext.h

Return to documentation for file (include/QNN/HTP/QnnHtpContext.h)

//==============================================================================
//
//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
//  All rights reserved.
//  Confidential and Proprietary - Qualcomm Technologies, Inc.s
//
//==============================================================================

/**
 *  @file
 *  @brief QNN HTP component Context API.
 *
 *         The interfaces in this file work with the top level QNN
 *         API and supplements QnnContext.h for HTP backend
 */

#ifndef QNN_HTP_CONTEXT_H
#define QNN_HTP_CONTEXT_H

#include "QnnContext.h"

#ifdef __cplusplus
extern "C" {
#endif

//=============================================================================
// Macros
//=============================================================================

//=============================================================================
// Data Types
//=============================================================================

/**
 * @brief This enum provides different HTP context configuration
 *        options associated with QnnContext
 */
typedef enum {
  QNN_HTP_CONTEXT_CONFIG_OPTION_WEIGHT_SHARING_ENABLED               = 1,
  QNN_HTP_CONTEXT_CONFIG_OPTION_REGISTER_MULTI_CONTEXTS              = 2,
  QNN_HTP_CONTEXT_CONFIG_OPTION_FILE_READ_MEMORY_BUDGET              = 3,
  QNN_HTP_CONTEXT_CONFIG_OPTION_DSP_MEMORY_PROFILING_ENABLED         = 4,
  QNN_HTP_CONTEXT_CONFIG_OPTION_SHARE_RESOURCES                      = 5,
  QNN_HTP_CONTEXT_CONFIG_OPTION_IO_MEM_ESTIMATION                    = 6,
  QNN_HTP_CONTEXT_CONFIG_OPTION_PREPARE_ONLY                         = 7,
  QNN_HTP_CONTEXT_CONFIG_OPTION_INIT_ACCELERATION                    = 8,
  QNN_HTP_CONTEXT_CONFIG_OPTION_SKIP_VALIDATION_ON_BINARY_SECTION    = 9,
  QNN_HTP_CONTEXT_CONFIG_OPTION_SHARE_RESOURCES_OPTIMIZATION_TYPE    = 10,
  QNN_HTP_CONTEXT_CONFIG_OPTION_USE_EXTENDED_UDMA                    = 11,
  QNN_HTP_CONTEXT_CONFIG_OPTION_REGISTER_CONCURRENT_RESOURCE_SHARING = 12,
  QNN_HTP_CONTEXT_CONFIG_OPTION_LORA_WEIGHT_SHARING_ENABLED          = 13,
  QNN_HTP_CONTEXT_CONFIG_OPTION_RESERVED_14                          = 14,
  QNN_HTP_CONTEXT_CONFIG_OPTION_RESERVED_15                          = 15,
  QNN_HTP_CONTEXT_CONFIG_OPTION_UNKNOWN                              = 0x7fffffff
} QnnHtpContext_ConfigOption_t;

typedef struct {
  // Handle referring to the first context associated to a group. When a new
  // group is to be registered, the following value must be 0.
  Qnn_ContextHandle_t firstGroupHandle;
  // Max spill-fill buffer to be allocated for the group of context in bytes.
  // The value that is passed during the registration of the first context to
  // a group is taken. Subsequent configuration of this value is disregarded.
  uint64_t maxSpillFillBuffer;
} QnnHtpContext_GroupRegistration_t;

// This enum is supported only with the QnnContext_createFromBinaryListAsync API, when
// shareResources is true; otherwise, it is ignored. This enumeration allows users to specify how
// graphs are going to be executed, providing QNN with hints for optimizing memory.
typedef enum {
  // Default value if no user input is provided.
  // This type is used for sequential graph execution, optimizing both VA and memory.
  SEQUENTIAL_WITH_VA_OPTIMIZATION,
  // This type is used for sequential graph execution, optimizing memory.
  SEQUENTIAL_WITHOUT_VA_OPTIMIZATION,
  // This type is used for concurrent resource sharing, optimizing memory by sharing
  // resources across contexts with the same priority level.
  CONCURRENT_OPTIMIZATION,
} QnnHtpContext_ShareResourcesOptimizationType_t;

//=============================================================================
// Public Functions
//=============================================================================

//------------------------------------------------------------------------------
//   Implementation Definition
//------------------------------------------------------------------------------

// clang-format off

/**
 * @brief        Structure describing the set of configurations supported by context.
 *               Objects of this type are to be referenced through QnnContext_CustomConfig_t.
 *
 *               The struct has two fields - option and a union of config values
 *               Based on the option corresponding item in the union can be used to specify
 *               config.
 *
 *               Below is the Map between QnnHtpContext_CustomConfig_t and config value
 *
 *               \verbatim embed:rst:leading-asterisk
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | #  | Config Option                                                       | Configuration Struct/value                       |
 *               +====+=====================================================================+==================================================+
 *               | 1  | QNN_HTP_CONTEXT_CONFIG_OPTION_WEIGHT_SHARING_ENABLED                | bool                                             |
 *               +====+=====================================================================+==================================================+
 *               | 2  | QNN_HTP_CONTEXT_CONFIG_OPTION_REGISTER_MULTI_CONTEXTS               | QnnHtpContext_GroupRegistration_t                |
 *               +====+=====================================================================+==================================================+
 *               | 3  | QNN_HTP_CONTEXT_CONFIG_OPTION_FILE_READ_MEMORY_BUDGET               | uint64_t                                         |
 *               +====+=====================================================================+==================================================+
 *               | 4  | QNN_HTP_CONTEXT_CONFIG_OPTION_DSP_MEMORY_PROFILING_ENABLED          | bool                                             |
 *               +====+=====================================================================+==================================================+
 *               | 5  | QNN_HTP_CONTEXT_CONFIG_OPTION_SHARE_RESOURCES                       | bool                                             |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | 6  | QNN_HTP_CONTEXT_CONFIG_OPTION_IO_MEM_ESTIMATION                     | bool                                             |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | 7  | QNN_HTP_CONTEXT_CONFIG_OPTION_PREPARE_ONLY                          | bool                                             |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | 8  | QNN_HTP_CONTEXT_CONFIG_OPTION_INIT_ACCELERATION                     | bool                                             |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | 9  | QNN_HTP_CONTEXT_CONFIG_OPTION_SKIP_VALIDATION_ON_BINARY_SECTION     | bool                                             |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | 10 | QNN_HTP_CONTEXT_CONFIG_OPTION_SHARE_RESOURCES_OPTIMIZATION_TYPE     | QnnHtpContext_ShareResourcesOptimizationType_t   |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | 11 | QNN_HTP_CONTEXT_CONFIG_OPTION_USE_EXTENDED_UDMA                     | bool                                             |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | 12 | QNN_HTP_CONTEXT_CONFIG_OPTION_REGISTER_CONCURRENT_RESOURCE_SHARING  | QnnHtpContext_GroupRegistration_t                |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               | 13 | QNN_HTP_CONTEXT_CONFIG_OPTION_LORA_WEIGHT_SHARING_ENABLED           | bool                                             |
 *               +----+---------------------------------------------------------------------+--------------------------------------------------+
 *               \endverbatim
 */
typedef struct QnnHtpContext_CustomConfig {
  QnnHtpContext_ConfigOption_t option;
  union UNNAMED {
    // This field sets the weight sharing which is by default false
    bool weightSharingEnabled;
    QnnHtpContext_GroupRegistration_t groupRegistration;
    // - Init time may be impacted depending the value set below
    // - Value should be grather than 0 and less than or equal to the file size
    //    - If set to 0, the feature is not utilized
    //    - If set to greater than file size, min(fileSize, fileReadMemoryBudgetInMb) is used
    // - As an example, if value 2 is passed, it would translate to (2 * 1024 * 1024) bytes
    uint64_t fileReadMemoryBudgetInMb;
    bool dspMemoryProfilingEnabled;
    // This field enables resource optimization. When it is set to true optimizations are
    // done based on QnnHtpContext_ShareResourcesOptimizationType_t setting.
    // Note This configuration option is only supported when using QnnContext_createFromBinaryListAsync API.
    bool shareResources;
    // This field enables I/O memory estimation during QnnContext_createFromBinary API when multiple
    // PDs are available. When enabled, it estimates the total size of the I/O tensors required by
    // the context to ensure sufficient space on the PD before deserialization. This feature helps
    // with memory registration failures in large models.
    // Note that enabling this feature increases peak RAM usage during context initialization phase
    // in QnnContext_createFromBinary, but sustained RAM remains unaffected.
    bool ioMemEstimation;
    // This field enables model preparation without mapping its content on the DSP side. It is
    // useful when a model needs to be prepared on the device but executed through a serialized
    // binary method. This prevents extra mapping onto the DSP VA space. Set this flag only when
    // creating the context.
    bool isPrepareOnly;
    // This field enables initialization acceleration, which is disabled by default.
    // If set to true, the DSP will utilize all hardware threads to accelerate deserialization.
    // It is not recommended to execute graphs simultaneously, as this will significantly degrade
    // performance.
    // Note that this feature may not be effective for small graphs with a few number of ops.
    bool initAcceleration;
    // This field enables crc32 check skip in Lora super adapter apply, which is disabled by default.
    // If set to true, crc32 check for non-base adapter in super adapter apply use case will be
    // skipped to improve time cost.
    // Note that base adapter in super adaper never do crc32 check, therefore, their apply time cost
    // won't improve by turning this config option on.
    bool skipValidationOnBinarySection;
    // If shareResources is true:
    // shareResOptType is read. If no value is set by the user,
    // the default value of QnnHtpContext_ShareResourcesOptimizationType_t is used.
    // If shareResources is false:
    // shareResOptType is ignored.
    // Note: This configuration option is only supported when using the QnnContext_createFromBinaryListAsync API.
    QnnHtpContext_ShareResourcesOptimizationType_t shareResOptType;
    // This field enables preparing graphs, associated with this context, with far-mapping enabled so that weights
    // and spill/fill buffer are mapped to the far region of the DSP which is helpful if PD's limited VA space is
    // exhausted. Total RAM usage may increase if used together with shared weights. Only available for Hexagon
    // arch v81 and above.
    bool useExtendedUdma;
    // This field enables concurrent resource sharing among graphs with the same priority level
    // during the QnnContext_createFromBinary API on devices that support this capability.
    QnnHtpContext_GroupRegistration_t concurrentGroupRegistration;
    // This field sets the lora weight sharing. When it is set to true, one additional replaceable weight blob
    // that contains the RP shared by all graphs will be generated and maintained. It is disabled by default.
    bool loraWeightSharingEnabled;
  };
} QnnHtpContext_CustomConfig_t;

/// QnnHtpContext_CustomConfig_t initializer macro
#define QNN_HTP_CONTEXT_CUSTOM_CONFIG_INIT            \
  {                                                   \
    QNN_HTP_CONTEXT_CONFIG_OPTION_UNKNOWN, /*option*/ \
    {                                                 \
      false                          /*weightsharing*/\
    }                                                 \
  }

/**
 * @brief Structure describing the set of properties supported by context.
 *        Objects of this type are to be referenced through QnnContext_CustomProperty_t.
 *        Used by QnnContext_getProperty.
 */
typedef enum {
  // get the alignment requirement of persistent buffers
  QNN_HTP_CONTEXT_GET_PROP_BUFFER_START_ALIGNMENT = 1,
  // get the size requirement of spill/fill buffer
  QNN_HTP_CONTEXT_GET_PROP_MAX_SPILLFILL_BUFFER_SIZE = 2,
  // get the size requirement of persistent weights buffer
  QNN_HTP_CONTEXT_GET_PROP_WEIGHTS_BUFFER_SIZE = 3,
  QNN_HTP_CONTEXT_GET_PROP_RESERVED_4 = 4,
  QNN_HTP_CONTEXT_GET_PROP_RESERVED_5 = 5,
  // Unused, present to ensure 32 bits.
  QNN_HTP_CONTEXT_GET_PROP_UNDEFINED = 0x7fffffff
} QnnHtpContext_GetPropertyOption_t;

// used by QnnContext_getProperty
typedef struct {
  QnnHtpContext_GetPropertyOption_t option;
  union UNNAMED {
    uint64_t bufferStartAlignment;
    uint64_t spillfillBufferSize;
    uint64_t weightsBufferSize;
  };
} QnnHtpContext_CustomProperty_t;

// clang-format off
/// QnnHtpContext_CustomProperty_t initializer macro
#define QNN_HTP_CONTEXT_CUSTOM_PROPERTY_INIT                         \
  {                                                                  \
    QNN_HTP_CONTEXT_GET_PROP_UNDEFINED,                 /*option*/   \
    0                                         /*scratchBufferSize*/  \
  }
// clang-format on

// clang-format on
#ifdef __cplusplus
}  // extern "C"
#endif

#endif