//==============================================================================
//
//  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
//  All Rights Reserved.
//  Confidential and Proprietary - Qualcomm Technologies, Inc.
//
//==============================================================================

#include <chrono>
#if defined(__GNUC__) && !defined(__clang__)
#include <cstring>
#endif
#ifndef _WIN32
    #include <sys/mman.h>
#endif

#include "dlwrap.hpp"
#include "QnnApi.hpp"

#ifdef SPILLFILL
    #include "QnnHtpContext.h"
    #include "QnnHtpCommon.h"
#endif

QnnApi::~QnnApi() {
    // QNN_DEBUG("Destroying Performance");
    // if (true != destroyPerformance()) {
    //     QNN_DEBUG("Could not destroy Performance");
    // }

    QNN_DEBUG("Freeing Graphs");
    if (true != freeGraphs()) {
        QNN_DEBUG("Could not free Graphs");
    }

    // Free context if not already done
    if (m_isContextCreated) {
        QNN_DEBUG("Freeing Context");
        if (true != freeContext()) {
            QNN_DEBUG("Could not free context");
        }
    }

    if (m_profileBackendHandle) {
        QNN_DEBUG("Freeing profile handle");
        if (QNN_PROFILE_NO_ERROR != m_qnnInterface.profileFree(m_profileBackendHandle))
            QNN_ERROR("Could not free QNN HTP backend profile handle.");
    }

    QNN_DEBUG("Freeing Device");
    if (getDeviceStatus()) {
        if (true != freeDevice()) {
            QNN_ERROR("Device Free failure");
        }
    }

    QNN_DEBUG("Terminating Logging");
    if (m_isLogInitialized) {
        terminateLog();
    }
    m_isLogInitialized = false;

    // Terminate backend
    if (m_isBackendInitialized) {
        QNN_DEBUG("Terminating Backend");
        if (true != terminateBackend()) {
            QNN_DEBUG("Could not terminate backend");
        }
    }

    // Skip dlclose for HTP because it runs its own cleanup routines later.
    if (m_backendLibraryHandle && (m_backendId != QNN_BACKEND_ID_HTP)) {
        QNN_DEBUG("Closing Backend Lib Handle");
        dlclose(m_backendLibraryHandle);
    }

    if (m_libModelHandle) {
        QNN_DEBUG("Closing Model Lib Handle");
        dlclose(m_libModelHandle);
    }

    if (!m_contextBinBuffersToBeCleared.empty()) {
        for (auto& [buffer, bufferSize] : m_contextBinBuffersToBeCleared) {
            QNN_DEBUG("Free context bin buffer %p of size %lu", buffer, bufferSize);
            if (m_mmapContextBins) {
#ifndef _WIN32
                if (munmap(buffer, bufferSize)) {
                    QNN_ERROR("Failed to unmap buffer for context");
                }
#endif
            } else {
                delete[] buffer;
            }
        }
        m_contextBinBuffersToBeCleared.clear();
    }
}

bool QnnApi::getContextConfigs(
        QnnContext_Config_t***          configs,
        uint32_t&                       contextConfigCount,
        Qnn_Priority_t                  contextPriority,
        bool                            graphSwitching,
        const std::vector<std::string>& execSelectGraphs,
        bool                            loadSelectGraphs
) {
    std::vector<QnnContext_Config_t*> contextConfigPtrsVec;

    if (contextPriority == QNN_PRIORITY_UNDEFINED) {
        contextConfigPtrsVec.push_back((QnnContext_Config_t *) malloc(sizeof(QnnContext_Config_t)));
        contextConfigPtrsVec.back()->option =
                QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_UNDEFINED;
    } else {
        if (contextPriority != QNN_PRIORITY_DEFAULT) {
            contextConfigPtrsVec.push_back((QnnContext_Config_t *) malloc(sizeof(QnnContext_Config_t)));
            contextConfigPtrsVec.back()->option =
                    QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_OPTION_PRIORITY;
            contextConfigPtrsVec.back()->priority = contextPriority;
        }
    }

    const char** graphNames = nullptr;

    if (loadSelectGraphs && !execSelectGraphs.empty()) {
        graphNames = (const char**)malloc(sizeof(const char*) * (execSelectGraphs.size() + 1));
        for (size_t i = 0; i < execSelectGraphs.size(); ++i) {
            graphNames[i] = execSelectGraphs[i].c_str();
        }

        graphNames[execSelectGraphs.size()] = nullptr; // NULL termination
        contextConfigPtrsVec.push_back((QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t)));
        contextConfigPtrsVec.back()->option =
                QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_ENABLE_GRAPHS;
        contextConfigPtrsVec.back()->enableGraphs = graphNames;
    }

    if (graphSwitching) {
        contextConfigPtrsVec.push_back((QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t)));
        contextConfigPtrsVec.back()->option =
                QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_MEMORY_LIMIT_HINT;
        contextConfigPtrsVec.back()->memoryLimitHint = 1024;

        contextConfigPtrsVec.push_back((QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t)));
        contextConfigPtrsVec.back()->option =
                QnnContext_ConfigOption_t::QNN_CONTEXT_CONFIG_PERSISTENT_BINARY;
        contextConfigPtrsVec.back()->isPersistentBinary = 1;
    }

    contextConfigCount = contextConfigPtrsVec.size();

    QnnContext_Config_t** contextConfigPtrs =
            (QnnContext_Config_t**)malloc(contextConfigCount * sizeof(QnnContext_Config_t*));

    if (nullptr == contextConfigPtrs) {
        QNN_ERROR("Could not allocate memory for allContextConfigs");
        return false;
    }

    for (size_t i = 0; i < contextConfigCount; i++) {
        contextConfigPtrs[i] = contextConfigPtrsVec[i];
    }

    *configs = contextConfigPtrs;

    return true;
}

bool QnnApi::mergeAllContextConfigs(
        QnnContext_Config_t*** allCustomContextConfigs,
        QnnContext_Config_t**  customConfigs,
        QnnContext_Config_t**  contextConfigs,
        uint32_t               customConfigCount,
        uint32_t               contextConfigCount
) {
    QnnContext_Config_t** allContextConfigs{nullptr};
    if (contextConfigCount + customConfigCount > 0) {
        allContextConfigs = (QnnContext_Config_t**)calloc(
                (contextConfigCount + customConfigCount + 1), sizeof(QnnContext_Config_t*)
        );
        if (nullptr == allContextConfigs) {
            QNN_ERROR("Could not allocate memory for allContextConfigs");
            return false;
        }
        for (size_t cnt = 0; cnt < contextConfigCount; cnt++) {
            allContextConfigs[cnt] = contextConfigs[cnt];
        }
        for (size_t cnt = 0; cnt < customConfigCount; cnt++) {
            allContextConfigs[cnt + contextConfigCount] = customConfigs[cnt];
        }
    }
    *allCustomContextConfigs = allContextConfigs;

    return true;
}

bool QnnApi::freeContextConfigs(QnnContext_Config_t** contextConfigs, uint32_t contextConfigCount) {
    if (contextConfigs) {
        for (size_t i = 0; i < contextConfigCount; i++) {
            if (contextConfigs[i]->option == QNN_CONTEXT_CONFIG_ENABLE_GRAPHS) {
                free((const char**)contextConfigs[i]->enableGraphs);
            }
            free(contextConfigs[i]);
        }
        free(contextConfigs);
    }

    return true;
}

bool QnnApi::setGraphConfigsBeforeExecute(
        Qnn_GraphHandle_t   graphHandle,
        QnnGraph_Config_t** graphConfigs,
        uint32_t            configCount
) {
    if (!graphConfigs || configCount == 0u) {
        QNN_ERROR("No graph configs to set");
        return false;
    }

    std::vector<const QnnGraph_Config_t*> graphConfigsPointers(configCount + 1, nullptr);
    for (size_t idx = 0u; idx < configCount; idx++) {
        graphConfigsPointers[idx] = graphConfigs[idx];
    }
    if (QNN_SUCCESS != m_qnnInterface.graphSetConfig(graphHandle, graphConfigsPointers.data())) {
        QNN_ERROR("Failed to set graph configs.");
        return false;
    }

    return true;
}

bool QnnApi::getQnnInterface(std::string backendPath) {

    QnnInterfaceGetProvidersFn_t getInterfaceProviders{nullptr};

    m_backendLibraryHandle = dlopen(backendPath.c_str(), RTLD_NOW);
    if (nullptr == m_backendLibraryHandle) {
        QNN_ERROR("Unable to load backend. dlerror(): %s", dlerror());
        return false;
    }

    // Get QNN Interface
    getInterfaceProviders = (QnnInterfaceGetProvidersFn_t
    )dlsym(m_backendLibraryHandle, "QnnInterface_getProviders");
    if (nullptr == getInterfaceProviders) {
        return false;
    }

    uint32_t numProviders{0};
    QnnInterface_t** interfaceProviders{nullptr};
    if (QNN_SUCCESS !=
        getInterfaceProviders((const QnnInterface_t***)&interfaceProviders, &numProviders)) {
        QNN_ERROR("Failed to get interface providers.");
        return false;
    }

    if (nullptr == interfaceProviders) {
        QNN_ERROR("Failed to get interface providers: null interface providers received.");
        return false;
    }
    if (0u == numProviders) {
        QNN_ERROR("Failed to get interface providers: 0 interface providers.");
        return false;
    }

    bool foundValidInterface{false};
    for (size_t pIdx = 0; pIdx < numProviders; pIdx++) {
        const Qnn_ApiVersion_t& apiVersion = interfaceProviders[pIdx]->apiVersion;
        if ((QNN_API_VERSION_MAJOR == apiVersion.coreApiVersion.major) &&
            (QNN_API_VERSION_MINOR <= apiVersion.coreApiVersion.minor)) {
            foundValidInterface = true;
            m_qnnInterface = interfaceProviders[pIdx]->QNN_INTERFACE_VER_NAME;
            m_backendId = interfaceProviders[pIdx]->backendId;
            break;
        }
    }

    if (!foundValidInterface) {
        QNN_ERROR("Unable to find a valid interface.");
        m_backendLibraryHandle = nullptr;
        return false;
    }

    return true;
}

bool QnnApi::getQnnSystemInterface(std::string systemLibraryPath) {
    QnnSystemInterfaceGetProvidersFn_t getSystemInterfaceProviders{nullptr};

    void* systemLibraryHandle = dlopen(systemLibraryPath.c_str(), RTLD_NOW);
    if (nullptr == systemLibraryHandle) {
        QNN_ERROR("Unable to load system library. dlerror(): %s", dlerror());
        return false;
    }

    // Get QNN System Interface
    getSystemInterfaceProviders = (QnnSystemInterfaceGetProvidersFn_t
    )dlsym(systemLibraryHandle, "QnnSystemInterface_getProviders");
    if (nullptr == getSystemInterfaceProviders) {
        return false;
    }

    uint32_t numProviders{0};
    QnnSystemInterface_t** systemInterfaceProviders{nullptr};
    if (QNN_SUCCESS !=
        getSystemInterfaceProviders(
                (const QnnSystemInterface_t***)&systemInterfaceProviders, &numProviders
        )) {
        QNN_ERROR("Failed to get system interface providers.");
        return false;
    }
    if (nullptr == systemInterfaceProviders) {
        QNN_ERROR(
                "Failed to get system interface providers: null system interface providers received."
        );
        return false;
    }
    if (0 == numProviders) {
        QNN_ERROR("Failed to get system interface providers: 0 system interface providers.");
        return false;
    }

    bool foundValidSystemInterface{false};
    for (size_t pIdx = 0; pIdx < numProviders; pIdx++) {
        const Qnn_Version_t& systemApiVersion = systemInterfaceProviders[pIdx]->systemApiVersion;
        if (QNN_SYSTEM_API_VERSION_MAJOR == systemApiVersion.major &&
            QNN_SYSTEM_API_VERSION_MINOR <= systemApiVersion.minor) {
            foundValidSystemInterface = true;
            m_qnnSystemInterface = systemInterfaceProviders[pIdx]->QNN_SYSTEM_INTERFACE_VER_NAME;
            break;
        }
    }
    if (!foundValidSystemInterface) {
        QNN_ERROR("Unable to find a valid system interface.");
        return false;
    }

    return true;
}

bool QnnApi::loadModel(std::string model_path) {
    const char* dlsym_error;

    dlerror();
    m_libModelHandle = dlopen(model_path.c_str(), RTLD_NOW);
    if (nullptr == m_libModelHandle) {
        QNN_ERROR("Unable to load model. dlerror(): %s", dlerror());
        return false;
    }

    // Currently model Prefix is fixed. If model was prepared with
    // custom prefix, we need to change this.
    std::string modelPrefix = "QnnModel";

    std::string modelPrepareFunc = modelPrefix + "_composeGraphs";
    m_composeGraphsFnHandle =
            (ComposeGraphsFnHandleType_t)dlsym(m_libModelHandle, modelPrepareFunc.c_str());
    dlsym_error = dlerror();
    if (dlsym_error || nullptr == m_composeGraphsFnHandle) {
        m_composeGraphsFnHandle           = nullptr;
        std::string genaiModelPrepareFunc = "QnnModel_GenAI_composeGraphs";
        m_genaiComposeGraphsFnHandle      = (GenAIComposeGraphsFnHandleType_t
        )dlsym(m_libModelHandle, genaiModelPrepareFunc.c_str());
        dlsym_error                       = dlerror();
        if (dlsym_error || nullptr == m_genaiComposeGraphsFnHandle) {
            QNN_ERROR("Did not find QnnModel_composeGraph function: %s", dlsym_error);
            return false;
        }
    }

    std::string modelFreeFunc = modelPrefix + "_freeGraphsInfo";
    m_freeGraphInfoFnHandle =
            (FreeGraphInfoFnHandleType_t)dlsym(m_libModelHandle, modelFreeFunc.c_str());
    dlsym_error = dlerror();
    if (dlsym_error || nullptr == m_freeGraphInfoFnHandle) {
        QNN_ERROR("Did not find QnnModel_freeGraphsInfo function: %s", dlsym_error);
        return false;
    }

    return true;
}

void QnnApi::qnnLogCallback(
        const char*    fmt,
        QnnLog_Level_t level,
        uint64_t       timestamp,
        va_list        args
) {
    char        buffer[1024] = "";
    const char* levelStr     = "";
    switch (level) {
    case QNN_LOG_LEVEL_ERROR:
        levelStr = " ERROR ";
        break;
    case QNN_LOG_LEVEL_WARN:
        levelStr = "WARNING";
        break;
    case QNN_LOG_LEVEL_INFO:
        levelStr = "  INFO ";
        break;
    case QNN_LOG_LEVEL_DEBUG:
        levelStr = " DEBUG ";
        break;
    case QNN_LOG_LEVEL_VERBOSE:
        levelStr = "VERBOSE";
        break;
    case QNN_LOG_LEVEL_MAX:
        levelStr = "UNKNOWN";
        break;
    }

    int pos = snprintf(
            buffer, sizeof(buffer), "QNN: [%s] time=%lu:", levelStr, (unsigned long)timestamp
    );
    vsnprintf(buffer + pos, sizeof(buffer) - pos, fmt, args);
    printf("%s", buffer);
}

bool QnnApi::initializeLogging(const QnnLog_Level_t& logLevel, bool debug_qnn) {
    // initialize logging in the backend
    if (nullptr != m_qnnInterface.logCreate) {
        QnnLog_Callback_t logCallback = nullptr;
        if (debug_qnn) logCallback = QnnApi::qnnLogCallback;

        QNN_DEBUG(
                "Initializing logging in the backend. Callback: [%p], Log Level: [%d]",
                logCallback,
                logLevel
        );
        if (QNN_SUCCESS != m_qnnInterface.logCreate(logCallback, logLevel, &m_logHandle)) {
            QNN_WARN("Unable to initialize logging in the backend.");
        }
        m_isLogInitialized = true;
    }
    else {
        QNN_WARN("Logging not available in the backend.");
        return true;
    }

    return true;
}

void QnnApi::terminateLog() {
    // Terminate logging in the backend
    if (nullptr != m_qnnInterface.logFree && nullptr != m_logHandle) {
        if (QNN_SUCCESS != m_qnnInterface.logFree(m_logHandle)) {
            QNN_WARN("Unable to terminate logging in the backend.");
        }
    }
}

bool QnnApi::initializeBackendExtensions(
        BackendExtensionsConfigs backendExtensionsConfig,
        PerfProfile              parsedPerfProfile,
        bool                     debug_qnn
) {

    std::unique_ptr<BackendExtensions> backendExtensions(new BackendExtensions(
            backendExtensionsConfig, m_backendLibraryHandle, parsedPerfProfile, nullptr, debug_qnn
    ));
    if (nullptr == backendExtensions) {
        QNN_ERROR("Unable to create backend extensions object.");
        return false;
    }
    if (!backendExtensions->initialize()) {
        QNN_ERROR("Unable to initialize backend extensions.");
        return false;
    }
    m_backendExtensions = std::move(backendExtensions);

    return true;
}

// Initialize a QnnBackend.
bool QnnApi::initializeBackend() {
    if (nullptr == m_qnnInterface.backendCreate) {
        QNN_ERROR("BackendCreate API is not supported for this backend");
        return false;
    }

    QnnBackend_Config_t** customConfigs{nullptr};
    uint32_t              customConfigCount{0};
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeBackendInitialize(
                    &customConfigs, &customConfigCount
            )) {
            QNN_ERROR("Extensions Failure in beforeBackendInitialize()");
            return false;
        }
    }
    QnnBackend_Config_t** allBackendConfigs{nullptr};
    if ((m_backendConfigCount + customConfigCount) > 0) {
        allBackendConfigs = (QnnBackend_Config_t**)calloc(
                (m_backendConfigCount + customConfigCount + 1), sizeof(QnnBackend_Config_t*)
        );
        if (nullptr == allBackendConfigs) {
            QNN_ERROR("Could not allocate memory for allBackendConfigs");
            return false;
        }
        for (size_t cnt = 0; cnt < m_backendConfigCount; cnt++) {
            allBackendConfigs[cnt] = m_backendConfigs[cnt];
        }
        for (size_t cnt = 0; cnt < customConfigCount; cnt++) {
            allBackendConfigs[cnt + m_backendConfigCount] = customConfigs[cnt];
        }
    }

    auto returnStatus = m_qnnInterface.backendCreate(
            m_logHandle, (const QnnBackend_Config_t**)allBackendConfigs, &m_backendHandle
    );
    if (QNN_SUCCESS != returnStatus) {
        QNN_ERROR(
                "Could not initialize backend due to error = %llu", (unsigned long long)returnStatus
        );
        if (allBackendConfigs) {
            free(allBackendConfigs);
        }
        return false;
    }
    QNN_DEBUG("Initialize Backend Returned Status = %llu", (unsigned long long)returnStatus);

    m_isBackendInitialized = true;
    if (allBackendConfigs) {
        free(allBackendConfigs);
    }

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterBackendInitialize()) {
            QNN_ERROR("Extensions Failure in afterBackendInitialize()");
            return false;
        }
    }

    return true;
}

// Terminate the backend after done.
bool QnnApi::terminateBackend() {

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeBackendTerminate()) {
            QNN_ERROR("Extensions Failure in beforeBackendTerminate()");
            return false;
        }
    }
    // Terminate backend
    if (m_isBackendInitialized && nullptr != m_qnnInterface.backendFree) {
        QNN_DEBUG("Freeing backend");
        if (QNN_BACKEND_NO_ERROR != m_qnnInterface.backendFree(m_backendHandle)) {
            QNN_ERROR("Could not free backend");
        }
    }
    m_isBackendInitialized = false;

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterBackendTerminate()) {
            QNN_ERROR("Extensions Failure in afterBackendTerminate()");
            return false;
        }
    }

    return true;
}

bool QnnApi::createDevice() {
    QnnDevice_Config_t** deviceConfigs{nullptr};
    uint32_t             configCount{0};

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeCreateDevice(&deviceConfigs, &configCount)) {
            QNN_ERROR("Extensions Failure in beforeCreateDevice()");
            return false;
        }
    }
    std::vector<const QnnDevice_Config_t*> deviceConfigPointers(configCount + 1, nullptr);
    for (size_t idx = 0u; idx < configCount; idx++) {
        deviceConfigPointers[idx] = deviceConfigs[idx];
    }
    if (nullptr != m_qnnInterface.deviceCreate) {
        auto qnnStatus = m_qnnInterface.deviceCreate(
                m_logHandle, deviceConfigPointers.data(), &m_deviceHandle
        );
        if (QNN_SUCCESS != qnnStatus) {
            if (QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE == qnnStatus) {
                QNN_WARN("Device feature unsupported");
            } else {
                QNN_ERROR("Failed to create device: %lu", (unsigned long)qnnStatus);
                return false;
            }
        }
    }
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterCreateDevice()) {
            QNN_ERROR("Extensions Failure in afterCreateDevice()");
            return false;
        }
    }
    return true;
}

bool QnnApi::freeDevice() {
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeFreeDevice()) {
            QNN_ERROR("Extensions Failure in beforeFreeDevice()");
            return false;
        }
    }
    if (nullptr != m_qnnInterface.deviceFree) {
        auto qnnStatus = m_qnnInterface.deviceFree(m_deviceHandle);
        if (QNN_SUCCESS != qnnStatus) {
            if (QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE == qnnStatus) {
                QNN_WARN("Device feature unsupported");
            } else {
                QNN_ERROR("Failed to free device: %lu", (unsigned long)qnnStatus);
                return false;
            }
        }
    }
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterFreeDevice()) {
            QNN_ERROR("Extensions Failure in afterfreeDevice()");
            return false;
        }
    }
    return true;
}

// Create a Context in a backend.
bool QnnApi::createContext(ContextConfigs contextConfig) {
    QnnContext_Config_t** customConfigs{nullptr};
    uint32_t              customConfigCount{0};
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeContextCreate(
                    &customConfigs, &customConfigCount
            )) {
            QNN_ERROR("Extensions Failure in beforeContextCreate()");
            return false;
        }
    }

    QnnContext_Config_t** contextConfigs     = nullptr;
    uint32_t              contextConfigCount = 0;
    if (true != getContextConfigs(&contextConfigs, contextConfigCount, contextConfig.priority)) {
        QNN_ERROR("Couldn't populate context configs");
        return false;
    }

    QnnContext_Config_t** allContextConfigs{nullptr};
    if (true != mergeAllContextConfigs(
                        &allContextConfigs,
                        customConfigs,
                        contextConfigs,
                        customConfigCount,
                        contextConfigCount
                )) {
        QNN_ERROR("Error merging custom and context configs");
        return false;
    }

    Qnn_ContextHandle_t contextHandle{nullptr};
    if (QNN_CONTEXT_NO_ERROR != m_qnnInterface.contextCreate(
                                        m_backendHandle,
                                        nullptr,
                                        (const QnnContext_Config_t**)allContextConfigs,
                                        &contextHandle
                                )) {
        QNN_ERROR("Could not create context");
        if (allContextConfigs) {
            free(allContextConfigs);
        }

        return false;
    }

    m_contextVec.push_back(contextHandle);
    m_isContextCreated = true;
    if (allContextConfigs) {
        free(allContextConfigs);
    }

    if (true != freeContextConfigs(contextConfigs, contextConfigCount)) {
        QNN_ERROR("Couldn't free context configs");
        return false;
    }

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterContextCreate()) {
            QNN_ERROR("Extensions Failure in afterContextCreate()");
            return false;
        }
    }

    return true;
}

// Free context after done.
bool QnnApi::freeContext() {

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeContextFree()) {
            QNN_ERROR("Extensions Failure in beforeContextFree()");
            return false;
        }
    }
    for (const auto& context : m_contextVec) {
        if (QNN_CONTEXT_NO_ERROR != m_qnnInterface.contextFree(context, nullptr)) {
            QNN_ERROR("Could not free context");
            return false;
        }
    }
    m_isContextCreated = false;

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterContextFree()) {
            QNN_ERROR("Extensions Failure in afterContextFree()");
            return false;
        }
    }

    return true;
}

// Calls composeGraph function in QNN's model.so.
// composeGraphs is supposed to populate graph related
// information in graphsInfo and graphsCount.
// m_debug is the option supplied to composeGraphs to
// say that all intermediate tensors including output tensors
// are expected to be read by the app.
bool QnnApi::composeGraphs(std::vector<GraphConfigs> graphConfigs) {
    GraphConfigInfo_t** customConfigs{nullptr};
    uint32_t            customConfigGraphsCount{0};
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeComposeGraphs(
                    &customConfigs, &customConfigGraphsCount
            )) {
            QNN_ERROR("Extensions Failure in beforeComposeGraphs()");
            return false;
        }
    }

    std::map<std::string, std::vector<QnnGraph_Config_t*>> graphConfigsPointers;
    if (!graphConfigs.empty()) {
        for (auto const& inputGraphConfig : graphConfigs) {
            // Only reset the memory for this graph, if it has not previously been populated with
            // something
            if (graphConfigsPointers.find(inputGraphConfig.graphName) ==
                graphConfigsPointers.end()) {
                graphConfigsPointers[inputGraphConfig.graphName] =
                        std::vector<QnnGraph_Config_t*>();
                graphConfigsPointers[inputGraphConfig.graphName].reserve(s_graphConfigsReserveCount
                );
            }
            if (inputGraphConfig.priorityPresent) {
                QnnGraph_Config_t* newGraphConfig =
                        (QnnGraph_Config_t*)malloc(sizeof(QnnGraph_Config_t));
                newGraphConfig->option   = QNN_GRAPH_CONFIG_OPTION_PRIORITY;
                newGraphConfig->priority = inputGraphConfig.priority;
                graphConfigsPointers[inputGraphConfig.graphName].push_back(newGraphConfig);
            }
        }
    }

    if (customConfigs != nullptr && customConfigGraphsCount > 0) {
        for (size_t gIdx = 0; gIdx < customConfigGraphsCount; gIdx++) {
            auto configPtr = customConfigs[gIdx]->graphConfigs;
            if (*configPtr &&
                (!customConfigs[gIdx]->graphName || strlen(customConfigs[gIdx]->graphName) == 0)) {
                QNN_ERROR("Graph configs specified without a graph name in the backend extensions."
                );
                return false;
            }
            if (customConfigs[gIdx]->graphName && strlen(customConfigs[gIdx]->graphName) > 0 &&
                *configPtr) {
                if (graphConfigsPointers.find(customConfigs[gIdx]->graphName) ==
                    graphConfigsPointers.end()) {
                    graphConfigsPointers[customConfigs[gIdx]->graphName] =
                            std::vector<QnnGraph_Config_t*>();
                    graphConfigsPointers[customConfigs[gIdx]->graphName].reserve(
                            s_graphConfigsReserveCount
                    );
                }
                while (*configPtr) {
                    graphConfigsPointers[customConfigs[gIdx]->graphName].push_back(
                            (QnnGraph_Config_t*)*configPtr
                    );
                    configPtr++;
                }
            }
        }
    }

    GraphConfigInfo_t** graphConfigsInfo{nullptr};
    graphConfigsInfo =
            (GraphConfigInfo_t**)calloc(graphConfigsPointers.size(), sizeof(GraphConfigInfo_t*));
    size_t graphIdx{0};
    for (auto const& graphConfig : graphConfigsPointers) {
        if (graphConfigsInfo && graphConfig.second.size() > 0) {
            graphConfigsInfo[graphIdx] = (GraphConfigInfo_t*)malloc(sizeof(GraphConfigInfo_t));
            graphConfigsInfo[graphIdx]->graphName    = (char*)graphConfig.first.c_str();
            graphConfigsInfo[graphIdx]->graphConfigs = (const QnnGraph_Config_t**)calloc(
                    graphConfig.second.size() + 1, sizeof(QnnGraph_Config_t*)
            );
            for (size_t cnt = 0; cnt < graphConfig.second.size(); cnt++) {
                graphConfigsInfo[graphIdx]->graphConfigs[cnt] = graphConfig.second[cnt];
            }
        }
        graphIdx++;
    }

    int status = m_composeGraphsFnHandle(
            m_backendHandle,
            m_qnnInterface,
            m_contextVec[0],
            (const GraphConfigInfo_t**)graphConfigsInfo,
            graphConfigsPointers.size(),
            &m_graphsInfo,
            &m_graphsCount,
            m_DebugModeRequested,
            nullptr,
            QnnLog_Level_t::QNN_LOG_LEVEL_VERBOSE
    );

    if (graphConfigsInfo) {
        for (size_t gIdx = 0; gIdx < graphConfigsPointers.size(); gIdx++) {
            if (graphConfigsInfo[gIdx]) {
                if (graphConfigsInfo[gIdx]->graphConfigs) {
                    free(graphConfigsInfo[gIdx]->graphConfigs);
                    graphConfigsInfo[gIdx]->graphConfigs = nullptr;
                    graphConfigsInfo[gIdx]->graphName    = nullptr;
                }
                free(graphConfigsInfo[gIdx]);
                graphConfigsInfo[gIdx] = nullptr;
            }
        }
        free(graphConfigsInfo);
    }

    for (auto const& graphConfig : graphConfigsPointers) {
        for (size_t cnt = 0; cnt < graphConfig.second.size(); cnt++) {
            if (graphConfig.second[cnt]) {
                free(graphConfig.second[cnt]);
            }
        }
        // graphConfig.second.clear();
    }

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterComposeGraphs()) {
            QNN_ERROR("Extensions Failure in afterComposeGraphs()");
            return false;
        }
    }

    if (0 != status) {
        QNN_ERROR("Failed in composeGraphs()");
        return false;
    }

    // For now, we only handle 1 graph for this framework.
    if (m_graphsCount != 1) {
        QNN_ERROR("Only one graph is supported by framework");
        return false;
    }

    return true;
}

bool QnnApi::composeGraphs(
        std::vector<GraphConfigs> graphConfigs,
        uint32_t*                 inputDim,
        uint32_t                  inputRank,
        uint32_t*                 outputDim,
        uint32_t                  outputRank,
        uint32_t*                 kvDim,
        uint32_t                  kvRank,
        Qnn_Param_t*              params,
        uint32_t                  numParams
) {
    ModelError status = m_genaiComposeGraphsFnHandle(
            m_backendHandle,
            m_qnnInterface,
            m_contextVec[0],
            nullptr,
            0,
            inputDim,
            inputRank,
            outputDim,
            outputRank,
            kvDim,
            kvRank,
            params,
            numParams,
            &m_graphsInfo,
            &m_graphsCount,
            m_DebugModeRequested,
            nullptr,
            QnnLog_Level_t::QNN_LOG_LEVEL_VERBOSE
    );

    graphCountPerContext = m_graphsCount;

    if (status == MODEL_NO_ERROR) {
        return true;
    }

    return false;
}

bool QnnApi::finalizeGraphs() {
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeGraphFinalize()) {
            QNN_ERROR("Extensions Failure in beforeGraphFinalize()");
            return false;
        }
    }

    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
        if (QNN_GRAPH_NO_ERROR !=
            m_qnnInterface.graphFinalize(m_graphsInfo[graphIdx]->graph, nullptr, nullptr)) {
            return false;
        }

        if (m_profileBackendHandle) {
            extractBackendProfilingInfo(m_profileBackendHandle);
        }
    }

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterGraphFinalize()) {
            QNN_ERROR("Extensions Failure in afterGraphFinalize()");
            return false;
        }
    }

    return true;
}

bool QnnApi::freeGraphs() {
    freeGraphsInfo(&m_graphsInfo, m_graphsCount);
    if (m_graphsInfo) {
        free(m_graphsInfo);
    }
    m_graphsInfo  = nullptr;
    m_graphsCount = 0;
    return true;
}

bool QnnApi::mapAndGetContextBinaryInfo(
        const bool                            use_mmap,
        std::shared_ptr<uint8_t>&             buffer,
        const std::string                     binaryPath,
        const uint64_t                        bufferSize,
        const size_t                          contextIdx,
        const bool                            graphSwitching,
        QnnSystemContext_Handle_t             sysCtxHandle,
        const QnnSystemContext_BinaryInfo_t** binaryInfo
) {
    if (use_mmap) {
#ifndef _WIN32
        void* mappedBuffer = nullptr;
        if (true != mmapBinaryFile(binaryPath, &mappedBuffer, bufferSize)) {
            QNN_ERROR("Failed to read binary data for context index = %zu", contextIdx);
            return false;
        }
        buffer = std::shared_ptr<uint8_t>(
                static_cast<uint8_t*>(mappedBuffer),
                [graphSwitching, bufferSize](uint8_t* ptr) {
                    if (!graphSwitching) {
                        munmap(ptr, bufferSize);
                    }
                }
        );
#else
        return false;
#endif
    } else {
        buffer = std::shared_ptr<uint8_t>(new uint8_t[bufferSize], [graphSwitching](uint8_t* ptr) {
            if (!graphSwitching) {
                delete[] ptr;
            }
        });

        if (!buffer) {
            QNN_ERROR("Failed to allocate memory for context index = %zu", contextIdx);
            return false;
        }
        if (true != readBinaryFromFile(binaryPath, buffer.get(), bufferSize)) {
            QNN_ERROR("Failed to read binary data for context index = %zu", contextIdx);
            return false;
        }
    }

    if (graphSwitching) {
        m_contextBinBuffersToBeCleared.push_back({buffer.get(), bufferSize});
    }

    Qnn_ContextBinarySize_t binaryInfoSize{0};
    if (QNN_SUCCESS != m_qnnSystemInterface.systemContextGetBinaryInfo(
                               sysCtxHandle,
                               static_cast<void*>(buffer.get()),
                               bufferSize,
                               binaryInfo,
                               &binaryInfoSize
                       )) {
        QNN_ERROR("Failed to get context binary info for context index = %zu", contextIdx);
        return false;
    }

    return true;
}

bool QnnApi::parseIOTensorsAndAccumulate(){

    for(int gIdx =0;gIdx<m_graphsCount;gIdx++){
        auto& graph_info = m_graphsInfo[gIdx];
        for (bool io : {true, false}) {
            uint32_t n_tensors = (io) ? graph_info->numInputTensors : graph_info->numOutputTensors;
            auto  tensor_wrappers = (io) ? graph_info->inputTensors : graph_info->outputTensors;
            for (size_t tensor_idx = 0; tensor_idx < n_tensors; tensor_idx++) {

                TensorWrapper& tensor      = tensor_wrappers[tensor_idx];
                std::string    tensor_name = QnnApi::getTensorName(tensor);

                std::vector<size_t> tensor_dims;
                if (!QnnApi::getTensorShape(tensor_dims, tensor)){
                    QNN_ERROR("Couldn't get tensor shape : %s", tensor_name.c_str());
                    return false;
                }

                std::vector<qualla::QnnUtils::QuantParam> quantParams;
                if (!QnnApi::getTensorQuantParams(&tensor_wrappers[tensor_idx], quantParams)) {
                    quantParams.emplace_back(0, 0);
                }

                m_graphtoIOMap[gIdx][tensor_name] =
                        qualla::QnnUtils::Tensor(tensor_wrappers + tensor_idx, tensor_dims, quantParams);
            }
        }
    }


    // Maps tensor_name to context bitVector, each bit representing a context the tensor exists in
    std::map<std::string, CtxBitVector> tensor_ctx_map;
    // Maps a ContextHandle to a one-hot encoded bitVector (e.g. 1, 2, 4, ...)
    std::map<int, CtxBitVector> ctx_to_hash;

    // Iterate over all tensors in all GraphVariants to figure out allocations
    for(int gIdx =0;gIdx<m_graphsCount;gIdx++){
        auto& graph_info = m_graphsInfo[gIdx];
        // Map the context handle to a hashed bitVector
        auto curContextHandle = m_graphIdxToContextIdx[gIdx];
        if (!ctx_to_hash.contains(curContextHandle)) {
            ctx_to_hash[curContextHandle] = 1 << ctx_to_hash.size();
        }
            for (auto& [tname, tspec] : m_graphtoIOMap[gIdx]) {
                size_t       size     = tspec.dims.getAlignedSize();
                CtxBitVector tcontext = ctx_to_hash[curContextHandle];

                // Check if it's LoRA enabled model
                if (!m_loraWeightEnabled && tname.find("lora") != std::string::npos) m_loraWeightEnabled = true;
                // Check if graph has lmhead weight input
                if (!m_lmHeadWeightInput && tname.compare("weight") == 0)
                    m_lmHeadWeightInput = true;

                // Allocate KV Tensors as in+out
                if (tname.starts_with("past_")) {
                    if (tname.ends_with("_in")) continue; // kv_in is processed along with kv_out

                    // For kv_out, add the size of kv_in as well
                    const std::string tname_in = tname.substr(0, tname.rfind('_')).append("_in");

                    if (m_graphtoIOMap[gIdx].count(tname_in)){
                        size += m_graphtoIOMap[gIdx][tname_in].dims.getAlignedSize();
                    }


                    // Allocate extra buffer for pointer shift
                    // 1024-n for keys (1024-n)*128 for values
                    // For aligned size, we might as well use 1024 and 128*1024
                    if (m_kvUpdateMethod == POINTER_SHIFT)
                        size += (tname.starts_with("past_key")) ? m_ctxSize
                                                                : m_ctxSize * m_kvDim;
                }

                if (tensor_ctx_map.contains(tname)) { // For duplicate tensor names, link them
                    CtxBitVector context_bitvec = tensor_ctx_map.at(tname);
                    size = std::max(m_contextAllocMap[context_bitvec][tname], size);
                    if ((context_bitvec & tcontext) == 0) // Set of contexts needs to be updated
                        m_contextAllocMap[context_bitvec].erase(tname);

                    tcontext |= context_bitvec;
                }

                m_contextAllocMap[tcontext][tname] = size;
                tensor_ctx_map[tname]          = tcontext;
            }

        // Cleanup is essential in case of very large number of splits
        for (auto it = m_contextAllocMap.cbegin(); it != m_contextAllocMap.cend();)
            it = (it->second.empty()) ? m_contextAllocMap.erase(it) : ++it;
    }

#if QNN_IO_TENSOR_DEBUG
for(auto& [bitvector, nameMap] : m_contextAllocMap){
        for(auto& [tname, size] : nameMap)
            QNN_DEBUG("Context: %d Tensor name: %s Tensor size: %zu",bitvector,tname.c_str(),size);
    }
#endif
    return true;
}

bool QnnApi::registerTensorsWithBackend(uint32_t& graphIdx){

            std::map<std::string, std::tuple<int, size_t, size_t>> graph_allocs;
            for(auto& [tname,tspec] : m_graphtoIOMap[graphIdx]){

                if (tname.starts_with("past_") && tname.ends_with("_in")) continue; // Process past_key/value_Inputs along with the outputs
                auto& [alloc_idx, offset] = m_tensorAllocInfo.at(tname);

                size_t kv_offset = 0;
                size_t size      = tspec.dims.getAlignedSize();
               if (tname.starts_with("past_")) {
                    auto in_name = tname.substr(0, tname.rfind("_")).append("_in");
                    if (m_graphtoIOMap[graphIdx].count(in_name)) {
                        auto kv_in = m_graphtoIOMap[graphIdx][in_name];
                        kv_offset = kv_in.dims.getAlignedSize();
                        if (m_kvUpdateMethod == POINTER_SHIFT)
                            kv_offset += (tname.starts_with("past_key")) ? m_ctxSize
                                                                         : m_ctxSize * m_kvDim;
                        graph_allocs[in_name] = {alloc_idx, offset, kv_offset};
                    }
                }
                graph_allocs[tname]       = {alloc_idx, offset + kv_offset, size};
            }
        auto& curContextHandle = m_contextVec[m_graphIdxToContextIdx[graphIdx]];
       if (!m_ioBufferMgr->mapFusedBufferOffset(
                    m_graphsInfo[graphIdx], curContextHandle, graph_allocs
            )) {
            QNN_ERROR("Error mapping tensor to allocation buffers");
             return false;
       }

#if QNN_IO_TENSOR_DEBUG
for(auto& [tname, data] : graph_allocs){
           QNN_DEBUG("Tensor Name: %s Alloc Idx: %d Tensor Offset: %zu Tensor Size: %zu",tname.c_str(),get<0>(data),get<1>(data),get<2>(data));
    }
#endif

       return true;

}
bool QnnApi::createFromBinary(
        std::vector<std::string>        cachedBinariesPathVec,
        ContextConfigs                  contextConfig,
        int64_t                         spill_fill_buffer_size,
        uint64_t                        mmap_budget,
        bool                            graphSwitching,
        const std::vector<std::string>& execSelectGraphs,
        bool                            loadSelectGraphs
) {

    // Let backendExtensions populate configs
    QnnContext_Config_t** customConfigs{nullptr};
    uint32_t              customConfigCount{0};
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeCreateFromBinary(
                    &customConfigs, &customConfigCount
            )) {
            QNN_ERROR("Extensions Failure in beforeCreateFromBinary()");
            return false;
        }
    }

    if (nullptr == m_qnnSystemInterface.systemContextCreate ||
        nullptr == m_qnnSystemInterface.systemContextGetBinaryInfo ||
        nullptr == m_qnnSystemInterface.systemContextFree) {
        QNN_ERROR("QNN System function pointers are not populated.");
        return false;
    }

    graphCountPerContext = getGraphCountPerContext();

#ifdef SPILLFILL
    Qnn_ContextHandle_t          first_contextHandle{nullptr};
    QnnHtpContext_CustomConfig_t customConfigSF;
    customConfigSF.option = QNN_HTP_CONTEXT_CONFIG_OPTION_REGISTER_MULTI_CONTEXTS;
#endif

    // Reading Binary Buffer and storing for later use during Deserialization
    std::vector<std::shared_ptr<uint8_t>> bufferVec(cachedBinariesPathVec.size());
    // Stores sizes of all the Binary Buffers
    std::vector<uint64_t> allBuffSizes(cachedBinariesPathVec.size());
    // Stores graphs per Contexts
    std::vector<uint32_t> graphsPerContext(cachedBinariesPathVec.size());

    for (size_t contextIdx = 0; contextIdx < cachedBinariesPathVec.size(); contextIdx++) {

        auto _start = std::chrono::steady_clock::now(); // context Loading start
        uint64_t bufferSize{0};
        std::shared_ptr<uint8_t>& buffer{bufferVec[contextIdx]};
        uint32_t graphsCount;

        // read serialized binary into a byte buffer
        bufferSize = getFileSize(cachedBinariesPathVec[contextIdx]);
        allBuffSizes[contextIdx] = bufferSize;
        if (0 == bufferSize) {
            QNN_ERROR(
                    "Received path to an empty file for context index = %zu. Nothing to deserialize.",
                    contextIdx
            );
            return false;
        }

        // inspect binary info
        QnnSystemContext_Handle_t sysCtxHandle{nullptr};
        if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
            QNN_ERROR("Could not create system handle for context index = %zu", contextIdx);
            return false;
        }

        const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
        if (!mapAndGetContextBinaryInfo(
                    m_mmapContextBins,
                    buffer,
                    cachedBinariesPathVec[contextIdx],
                    bufferSize,
                    contextIdx,
                    graphSwitching,
                    sysCtxHandle,
                    &binaryInfo
            )) {
            QNN_ERROR("Failed to map context Binary for contextIdx: %zu", contextIdx);
            return false;
        }

        GraphInfo_t** graphsInfo{nullptr};
        if (!copyMetadataToGraphsInfo(binaryInfo, graphsInfo, graphsCount)) {
            QNN_ERROR("Failed to copy metadata for graph index = %zu", contextIdx);
            freeGraphsInfo(&graphsInfo, graphsCount);
            if (contextIdx > 0) freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }

        if (graphCountPerContext == -1) {
            graphCountPerContext = graphsCount;
            m_graphsInfo         = (GraphInfo_t**)calloc(
                    graphCountPerContext * cachedBinariesPathVec.size(), sizeof(GraphInfo_t*)
            );
        } else if (graphCountPerContext != graphsCount) {
            QNN_ERROR(
                    "Different len(graphs) found in different context files. Found %u vs %u",
                    graphsCount,
                    graphCountPerContext
            );
            freeGraphsInfo(&graphsInfo, graphsCount);
            if (contextIdx > 0) freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }

        auto _stop = std::chrono::steady_clock::now(); // context Loading stop
        QNN_DEBUG(
                "Loading contexts[%lu] took: %lld us",
                contextIdx,
                std::chrono::duration_cast<std::chrono::microseconds>(_stop - _start).count()
        );
        graphsPerContext.push_back(graphsCount);
        for (int gIdx = 0; gIdx < graphsCount; gIdx++) {
            m_graphsInfo[m_graphsCount] = graphsInfo[gIdx];
            m_graphIdxToContextIdx[m_graphsCount] = contextIdx;
            m_graphsCount++;
        }
        m_qnnSystemInterface.systemContextFree(sysCtxHandle);
        sysCtxHandle = nullptr;
    }

    // Iterate over all the tensors across the graphs Info and build info about the IO space it is requiring.
    if(false == parseIOTensorsAndAccumulate()){
        QNN_ERROR("Error in parsing the IO tensor info for all context binaries");
        return false;
    }

    bool isIOBufferMgrInitialized = false;
    for (size_t contextIdx = 0; contextIdx < cachedBinariesPathVec.size(); contextIdx++) {

        // Create context configs for each context
        QnnContext_Config_t** contextConfigs     = nullptr;
        uint32_t              contextConfigCount = 0;
        if (true != getContextConfigs(
            &contextConfigs,
            contextConfigCount,
            contextConfig.priority,
            graphSwitching,
            execSelectGraphs,
            loadSelectGraphs
        )) {
          QNN_ERROR("Couldn't populate context configs");
          return false;
        }

        // Merge BE specific and agnostic configs
        QnnContext_Config_t** allContextConfigs{nullptr};
        if (true != mergeAllContextConfigs(
            &allContextConfigs,
            customConfigs,
            contextConfigs,
            customConfigCount,
            contextConfigCount
        )) {
          QNN_ERROR("Error merging custom and context configs");
          return false;
        }

        if (nullptr == m_qnnInterface.contextCreateFromBinary) {
            QNN_ERROR(
                    "contextCreateFromBinaryFnHandle is nullptr for context index = %zu", contextIdx
            );
            freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }

        Qnn_ContextHandle_t contextHandle{nullptr};

        uint32_t customConfigCountSF = 0;

#ifdef SPILLFILL
        if (spill_fill_buffer_size > 0) {
            QnnHtpContext_GroupRegistration_t groupInfo{nullptr};
            if (contextIdx == 0) {
                groupInfo.firstGroupHandle = 0x0;
            } else {
                groupInfo.firstGroupHandle = first_contextHandle;
            }
            groupInfo.maxSpillFillBuffer     = spill_fill_buffer_size;
            customConfigSF.groupRegistration = groupInfo;

            QnnContext_Config_t** cfgs{nullptr};
            customConfigCountSF = 1;
            cfgs                = (QnnContext_Config_t**)malloc(
                    customConfigCountSF * sizeof(QnnContext_Config_t*)
            );
            cfgs[0]               = (QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t));
            cfgs[0]->option       = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
            cfgs[0]->customConfig = reinterpret_cast<QnnContext_CustomConfig_t>(&customConfigSF);
            if (true != mergeAllContextConfigs(
                                &allContextConfigs,
                                cfgs,
                                allContextConfigs,
                                customConfigCountSF,
                                contextConfigCount + customConfigCount
                        )) {
                QNN_ERROR("Error merging custom and context configs");
                return false;
            }
        }
#endif

        uint32_t customConfigCountIOMemEstimate = 0;
#if 1 // Adding IO_MEM_ESTIMATION
       QnnHtpContext_CustomConfig_t ioMemEstimation;
            ioMemEstimation.option = QNN_HTP_CONTEXT_CONFIG_OPTION_IO_MEM_ESTIMATION;
            ioMemEstimation.ioMemEstimation = true;

            QnnContext_Config_t** cfgs{nullptr};

            customConfigCountIOMemEstimate = 1;

            cfgs = (QnnContext_Config_t**)malloc(
                    customConfigCountIOMemEstimate * sizeof(QnnContext_Config_t*)
            );
            cfgs[0]         = (QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t));
            cfgs[0]->option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
            cfgs[0]->customConfig =
                    reinterpret_cast<QnnContext_CustomConfig_t>(&ioMemEstimation);
            if (true != mergeAllContextConfigs(
                    &allContextConfigs,
                    cfgs,
                    allContextConfigs,
                    customConfigCountIOMemEstimate,
                    contextConfigCount + customConfigCount + customConfigCountSF
            )) {
                QNN_ERROR("Error merging custom and context configs");
                return false;
            }
#endif

        if (mmap_budget > 0) {
            QnnHtpContext_CustomConfig_t customConfigReadBudget;
            customConfigReadBudget.option = QNN_HTP_CONTEXT_CONFIG_OPTION_FILE_READ_MEMORY_BUDGET;
            customConfigReadBudget.fileReadMemoryBudgetInMb = mmap_budget;

            QnnContext_Config_t** cfgs{nullptr};

            uint32_t customConfigCountReadBudget = 1;

            cfgs = (QnnContext_Config_t**)malloc(
                    customConfigCountReadBudget * sizeof(QnnContext_Config_t*)
            );
            cfgs[0]         = (QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t));
            cfgs[0]->option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
            cfgs[0]->customConfig =
                    reinterpret_cast<QnnContext_CustomConfig_t>(&customConfigReadBudget);
            if (true != mergeAllContextConfigs(
                                &allContextConfigs,
                                cfgs,
                                allContextConfigs,
                                customConfigCountReadBudget,
                                contextConfigCount + customConfigCount + customConfigCountSF + customConfigCountIOMemEstimate
                        )) {
                QNN_ERROR("Error merging custom and context configs");
                return false;
            }
        }


        auto start = std::chrono::steady_clock::now(); // context Deserialization starts

        auto errCode = m_qnnInterface.contextCreateFromBinary(
                m_backendHandle,
                m_deviceHandle,
                (const QnnContext_Config_t**)allContextConfigs,
                (const void*)bufferVec[contextIdx].get(),
                allBuffSizes[contextIdx],
                &contextHandle,
                nullptr // profile handle

        );

        auto stop = std::chrono::steady_clock::now(); // context Deserialization stops
        QNN_DEBUG(
                "Initializing context[%lu] with %u graphs took: %lld us",
                contextIdx,
                graphsPerContext[contextIdx],
                std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
        );

        if(!isIOBufferMgrInitialized){

            if (true != m_ioBufferMgr->initialize(contextHandle)) {
                QNN_ERROR("qnn-htp: failure to initialize IOTensor");
                return false;
            }

            isIOBufferMgrInitialized = true;

            // Calculate total allocation sizes and offset of each tensor within its allocated buffer
            if (m_ioBufferMgr->allocateBuffers(m_contextAllocMap, m_tensorAllocInfo) == false){
                QNN_ERROR("Failed to allocate the Memory across the context buffers.");
                return false;
            }

        }

        if (errCode != QNN_SUCCESS) {
            QNN_ERROR(
                    "Could not create context from binary for context index = %zu : err %d",
                    contextIdx,
                    (int)errCode
            );
             freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }

        // Clearing buffer which is deseralized to reduce Memory footprint
        bufferVec[contextIdx].reset();

        if (m_profileBackendHandle) {
            extractBackendProfilingInfo(m_profileBackendHandle);
        }

        m_contextVec.push_back(contextHandle);
        for (int n_graph = 0; n_graph < graphCountPerContext; n_graph++) {

            uint32_t graphIdx = contextIdx*graphCountPerContext + n_graph ;

            GraphInfo_t* cur_graph = m_graphsInfo[graphIdx];
            m_contextMap[cur_graph]       = contextHandle;

            if (nullptr == m_qnnInterface.graphRetrieve) {
                QNN_ERROR("graphRetrieveFnHandle is nullptr.");
                freeGraphsInfo(&m_graphsInfo, m_graphsCount);
                return false;
            }

            if (!m_graphsInfo || QNN_SUCCESS != m_qnnInterface.graphRetrieve(
                                                    contextHandle,
                                                    cur_graph->graphName,
                                                    &(cur_graph->graph)
                                            )) {
                QNN_ERROR("Unable to retrieve graph handle for graph index = %d", graphIdx);
                freeGraphsInfo(&m_graphsInfo, m_graphsCount);
                return false;
            }

            // Register all the Tensors per graph.
            if(false == registerTensorsWithBackend(graphIdx)){
                QNN_ERROR("Unable to MemRegister IO Tensors for graph index = %d", graphIdx);
                freeGraphsInfo(&m_graphsInfo, m_graphsCount);
                return false;
            }

        }


#ifdef SPILLFILL
        if (spill_fill_buffer_size > 0 && contextIdx == 0) {
            first_contextHandle = contextHandle;
        }
#endif
        if (true != freeContextConfigs(contextConfigs, contextConfigCount)) {
          QNN_ERROR("Couldn't free context configs");
          return false;
        }
        if (allContextConfigs) {
          free(allContextConfigs);
        }
    }

    m_isContextCreated = true;

    QNN_DEBUG(
            "Initialized %u graphs from %lu contexts", m_graphsCount, cachedBinariesPathVec.size()
    );

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterCreateFromBinary()) {
            QNN_ERROR("Extensions Failure in afterCreateFromBinary()");
            return false;
        }
    }

    return true;
}

#if QUALLA_QNN_API_VERSION >= 21700
bool QnnApi::checkCapabilityOfCreateAsync(bool& propRet) {
    if (nullptr == m_qnnInterface.propertyHasCapability) {
        QNN_ERROR("propertyHasCapability is nullptr.......");
        return false;
    }
    if (QNN_PROPERTY_SUPPORTED == m_qnnInterface.propertyHasCapability(
                                          QNN_PROPERTY_CONTEXT_SUPPORT_CREATE_FROM_BINARY_LIST_ASYNC
                                  )) {
        propRet = true;
    } else {
        propRet = false;
    }
    return true;
}

bool freeContextParams(QnnContext_Params_t** context_params_list, uint32_t numParams) {
    if (context_params_list == nullptr || *context_params_list == nullptr) {
        return false;
    }
    for (uint32_t i = 0; i < numParams; i++) {
        if (nullptr != context_params_list[i]) {
            delete context_params_list[i];
        }
    }
    return true;
}

void QnnApi::contextNotifyFn(
        Qnn_ContextHandle_t                          context,
        Qnn_GraphHandle_t                            graph,
        const char*                                  graph_name,
        QnnContext_createFromBinaryAsyncNotifyType_t completeType,
        void*                                        notifyParam,
        Qnn_ErrorHandle_t                            status
) {
    std::pair<QnnApi*, uint32_t>* pair =
            reinterpret_cast<std::pair<QnnApi*, uint32_t>*>(notifyParam);
    QnnApi*  QnnApi    = pair->first;
    uint32_t contextId = pair->second;

    if (completeType ==
        QnnContext_createFromBinaryAsyncNotifyType_t::QNN_CONTEXT_NOTIFY_TYPE_CONTEXT_INIT) {
        QnnApi->updateContext(context, contextId);
    } else if (completeType ==
               QnnContext_createFromBinaryAsyncNotifyType_t::QNN_CONTEXT_NOTIFY_TYPE_GRAPH_INIT) {
        QnnApi->updateQnnApiGraphsandContextsInfo(graph_name, graph, contextId);
    }
}

bool QnnApi::createFromBinaryListAsync(
        std::vector<std::string>        cachedBinariesPathVec,
        ContextConfigs                  contextConfig,
        int64_t                         spill_fill_buffer_size,
        uint64_t                        mmap_budget,
        bool                            graphSwitching,
        const std::vector<std::string>& execSelectGraphs,
        bool                            loadSelectGraphs
) {
    auto _start = std::chrono::steady_clock::now();

    // Let backendExtensions populate configs
    QnnContext_Config_t** customConfigs{nullptr};
    uint32_t              customConfigCount{0};
    std::map<std::string, std::tuple<QnnContext_Config_t **, uint32_t>> contextKeyToCustomConfigsMap;
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeCreateContextsFromBinaryList(
                    &contextKeyToCustomConfigsMap,&customConfigs, &customConfigCount
            )) {
            QNN_ERROR("Extensions Failure in beforeCreateContextsFromBinaryList()");
            return false;
        }
    }

    if (nullptr == m_qnnSystemInterface.systemContextCreate ||
        nullptr == m_qnnSystemInterface.systemContextGetBinaryInfo ||
        nullptr == m_qnnSystemInterface.systemContextFree) {
        QNN_ERROR("QNN System function pointers are not populated.");
        return false;
    }

    graphCountPerContext = getGraphCountPerContext();

    std::vector<QnnContext_Params_t*> context_params_list(cachedBinariesPathVec.size() +1, nullptr);
    std::vector<std::shared_ptr<uint8_t>> bufferVec(cachedBinariesPathVec.size());
    // for every context's graph info
    GraphInfo_t*** graphsInfo =
            (GraphInfo_t***)calloc(cachedBinariesPathVec.size(), sizeof(GraphInfo_t**));
    uint32_t graphsTotalNum = 0;
    std::vector<QnnContext_Config_t**> allContextConfigs{(unsigned int)cachedBinariesPathVec.size(), nullptr};
    std::vector<uint32_t> allContextConfigsSize{(unsigned int)cachedBinariesPathVec.size()};

    for (size_t contextIdx = 0; contextIdx < cachedBinariesPathVec.size(); contextIdx++) {
        auto _startPerContext = std::chrono::steady_clock::now();
        uint64_t bufferSize{0};
        std::shared_ptr<uint8_t>& buffer{bufferVec[contextIdx]};
        uint32_t graphsCount;

        // read serialized binary into a byte buffer
        bufferSize = getFileSize(cachedBinariesPathVec[contextIdx]);
        if (0 == bufferSize) {
            QNN_ERROR(
                    "Received path to an empty file for context index = %zu. Nothing to deserialize.",
                    contextIdx
            );
            return false;
        }

        // inspect binary info
        QnnSystemContext_Handle_t sysCtxHandle{nullptr};
        if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
            QNN_ERROR("Could not create system handle for context index = %zu", contextIdx);
            return false;
        }
        const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
        if (!mapAndGetContextBinaryInfo(
                    m_mmapContextBins,
                    buffer,
                    cachedBinariesPathVec[contextIdx],
                    bufferSize,
                    contextIdx,
                    graphSwitching,
                    sysCtxHandle,
                    &binaryInfo
            )) {
            QNN_ERROR("Failed to map context Binary.");
            return false;
        }

        if (!copyMetadataToGraphsInfo(binaryInfo, graphsInfo[contextIdx], graphsCount)) {
            QNN_ERROR("Failed to copy metadata for graph index = %zu", contextIdx);
            freeGraphsInfo(&graphsInfo[contextIdx], graphsCount);
            freeGraphsInfo(&m_graphsInfo, graphsCount);
            return false;
        }

        if (graphCountPerContext == -1) {
            graphCountPerContext = graphsCount;
            graphsTotalNum       = graphCountPerContext * cachedBinariesPathVec.size();
            m_graphsInfo         = (GraphInfo_t**)calloc(graphsTotalNum, sizeof(GraphInfo_t*));

        } else if (graphCountPerContext != graphsCount) {
            QNN_ERROR(
                    "Different len(graphs) found in different context files. Found %u vs %u",
                    graphsCount,
                    graphCountPerContext
            );
            freeGraphsInfo(&graphsInfo[contextIdx], graphsCount);
            freeGraphsInfo(&m_graphsInfo, graphsTotalNum);
            return false;
        }
        for (int gIdx = 0; gIdx < graphsCount; gIdx++) {
            int graphIdxOfAll           = contextIdx * graphsCount + gIdx;
            m_graphsInfo[graphIdxOfAll] = graphsInfo[contextIdx][gIdx];
            m_graphNameToInfo[m_graphsInfo[graphIdxOfAll]->graphName] = m_graphsInfo[graphIdxOfAll];
        }
        m_qnnSystemInterface.systemContextFree(sysCtxHandle);
        sysCtxHandle = nullptr;

        uint32_t contextConfigCount = 0;
        if (true != getContextConfigs(
                            &allContextConfigs[contextIdx],
                            contextConfigCount,
                            contextConfig.priority,
                            graphSwitching,
                            execSelectGraphs,
                            loadSelectGraphs
                    )) {
            QNN_ERROR("Couldn't populate context configs");
            return false;
        }
        allContextConfigsSize[contextIdx] = contextConfigCount;

        // Merge BE specific and agnostic configs
        if (true != mergeAllContextConfigs(
                            &allContextConfigs[contextIdx],
                            customConfigs,
                            allContextConfigs[contextIdx],
                            customConfigCount,
                            contextConfigCount
                    )) {
            QNN_ERROR("Error merging custom and context configs");
            return false;
        }
        allContextConfigsSize[contextIdx] += customConfigCount;

        uint32_t customConfigCountSF = 0;
        if (mmap_budget > 0) {
            QnnHtpContext_CustomConfig_t customConfigReadBudget;
            customConfigReadBudget.option = QNN_HTP_CONTEXT_CONFIG_OPTION_FILE_READ_MEMORY_BUDGET;
            customConfigReadBudget.fileReadMemoryBudgetInMb = mmap_budget;

            QnnContext_Config_t** cfgs{nullptr};
            uint32_t customConfigCountReadBudget = 1;
            cfgs = (QnnContext_Config_t**)malloc(
                    customConfigCountReadBudget * sizeof(QnnContext_Config_t*)
            );
            cfgs[0]         = (QnnContext_Config_t*)malloc(sizeof(QnnContext_Config_t));
            cfgs[0]->option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
            cfgs[0]->customConfig =
                    reinterpret_cast<QnnContext_CustomConfig_t>(&customConfigReadBudget);
            if (true != mergeAllContextConfigs(
                                &allContextConfigs[contextIdx],
                                cfgs,
                                allContextConfigs[contextIdx],
                                customConfigCountReadBudget,
                                contextConfigCount + customConfigCount + customConfigCountSF
                        )) {
                QNN_ERROR("Error merging custom and context configs");
                return false;
            }
            allContextConfigsSize[contextIdx] += customConfigCountReadBudget;
        }

        if (m_profileBackendHandle) {
            extractBackendProfilingInfo(m_profileBackendHandle);
        }

        // passing class QnnApi pointer into callback funtion(notifyFn)
        std::pair<QnnApi*, uint32_t>* notifyParam =
                new std::pair<QnnApi*, uint32_t>(this, (size_t)contextIdx);
        QnnContext_Params_t* contextParam = new QnnContext_Params_t{
                .version = QNN_CONTEXT_PARAMS_VERSION_1,
                .v1 =
                        QnnContext_ParamsV1_t{
                                (const QnnContext_Config_t**)allContextConfigs[contextIdx],
                                (const void*)buffer.get(),
                                bufferSize,
                                nullptr,
                                QnnApi::contextNotifyFn,
                                (void*)notifyParam
                        }
        };

        context_params_list[contextIdx] = contextParam;

        auto _stop = std::chrono::steady_clock::now();
    QNN_DEBUG(
            "Loading contexts[%lu] took: %lld us",
            contextIdx,
            std::chrono::duration_cast<std::chrono::microseconds>(_stop - _startPerContext).count()
    );
    }

    if (nullptr == m_qnnInterface.contextCreateFromBinaryListAsync) {
        QNN_ERROR("contextCreateFromBinaryListAsyncFnHandle is nullptr");
        freeGraphsInfo(&m_graphsInfo, graphsTotalNum);
        freeContextParams(context_params_list.data(), cachedBinariesPathVec.size());
        return false;
    }

    auto start = std::chrono::steady_clock::now();
    auto errCode = m_qnnInterface.contextCreateFromBinaryListAsync(
                m_backendHandle,
                m_deviceHandle,
                const_cast<const QnnContext_Params_t**>(context_params_list.data()),
                (const QnnContext_Config_t**)customConfigs,
                nullptr
        );
    auto stop = std::chrono::steady_clock::now();

    QNN_DEBUG(
            "Initializing %lu context with %u graphs took: %lld us",
            cachedBinariesPathVec.size(),
            graphsTotalNum,
            std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
    );

    // Explicitly free the context binary buffers. This ensures that the lifecycle
    // of the buffers outlasts the API call where their raw pointers are referenced.
    for (auto contextBinaryBuffer : bufferVec) {
        QNN_DEBUG("Freeing context binary buffer @%p", contextBinaryBuffer.get());
        contextBinaryBuffer.reset();
    }

    if (errCode != QNN_SUCCESS) {
        QNN_ERROR(
                "Could not create context from binary List Async for context, err %d", (int)errCode
        );
        freeGraphsInfo(&m_graphsInfo, graphsTotalNum);
        freeContextParams(context_params_list.data(), cachedBinariesPathVec.size());
        return false;
    }

    // set graphInfo in m_graphsInfo
    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
        int      contextIdxOfgraphsInfo  = graphIdx / graphCountPerContext;
        uint32_t contexIdxofCurrGraph = m_graphNameToContextIdx[m_graphsInfo[graphIdx]->graphName];
        m_graphsInfo[graphIdx] =
                graphsInfo[contextIdxOfgraphsInfo][graphIdx % graphCountPerContext];
        m_contextMap[m_graphsInfo[graphIdx]] = m_contextIdtoHandle[contexIdxofCurrGraph];
    }

    m_isContextCreated = true;

    if (true != freeContextParams(context_params_list.data(), cachedBinariesPathVec.size())) {
        QNN_ERROR("Couldn't free context params list");
        return false;
    }

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterCreateContextsFromBinaryList()) {
            QNN_ERROR("Extensions Failure in afterCreateContextsFromBinaryList()");
            return false;
        }
    }

    for (size_t contextIdx = 0; contextIdx < cachedBinariesPathVec.size(); contextIdx++) {
        if (true != freeContextConfigs(allContextConfigs[contextIdx], allContextConfigsSize[contextIdx])) {
            QNN_ERROR("Couldn't free context configs");
            return false;
        }
    }
    return true;
}
#endif

static std::vector<std::string> __split(std::string_view str, char delim) {
    std::vector<std::string> split;

    size_t i = 0, p = 0;

    for (; i <= str.size(); ++i) {
        if (i == str.size() || str[i] == delim) {
            split.push_back(std::string(str.data() + p, i - p));
            p = ++i;
        }
    }

    return split;
}

bool QnnApi::registerOpPackage(std::string opPackagePath) {
    const size_t pathIdx              = 0;
    const size_t interfaceProviderIdx = 1;
    const size_t targetIdx            = 2;

    auto opPackage = __split(opPackagePath, ':');

    if (opPackage.size() != 2 && opPackage.size() != 3) {
        return false;
    }

    if (nullptr == m_qnnInterface.backendRegisterOpPackage) {
        return false;
    }

    const char* target = nullptr;
    if (opPackage.size() == 3) {
        target = (char*)opPackage[targetIdx].c_str();
    }

    auto returnStatus = m_qnnInterface.backendRegisterOpPackage(
            m_backendHandle,
            (char*)opPackage[pathIdx].c_str(),
            (char*)opPackage[interfaceProviderIdx].c_str(),
            target
    );
    if (QNN_SUCCESS != returnStatus) {
        QNN_ERROR(
                "Could not register OpPackage backend due to error = %llu",
                (unsigned long long)returnStatus
        );
        return false;
    }

    return true;
}

// Performance Setting for HTP
bool QnnApi::initializePerformance() {

    QnnDevice_Infrastructure_t deviceInfra = nullptr;
    if (QNN_SUCCESS != m_qnnInterface.deviceGetInfrastructure(&deviceInfra)) {
        QNN_ERROR("Failure in deviceGetInfrastructure()");
        return false;
    }

    QnnHtpDevice_Infrastructure_t* htpInfra =
            static_cast<QnnHtpDevice_Infrastructure_t*>(deviceInfra);
    m_perfInfra       = &(htpInfra->perfInfra);
    uint32_t deviceId = 0;
    uint32_t coreId   = 0;
    if (QNN_SUCCESS != m_perfInfra->createPowerConfigId(deviceId, coreId, &m_powerConfigId)) {
        QNN_ERROR("Failure in createPowerConfigId()");
        return false;
    }

    return true;
}

bool QnnApi::destroyPerformance() {
    if (nullptr != m_perfInfra &&
        QNN_SUCCESS != m_perfInfra->destroyPowerConfigId(m_powerConfigId)) {
        QNN_ERROR("Failure in destroyPowerConfigId()");
        return false;
    }

    return true;
}

bool QnnApi::boostPerformance() {
    // Initialize the power config and select the voltage corner values for the performance setting.
    QnnHtpPerfInfrastructure_PowerConfig_t powerConfig;
    memset(&powerConfig, 0, sizeof(powerConfig));

    powerConfig.option                     = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
    powerConfig.dcvsV3Config.dcvsEnable    = 1;
    powerConfig.dcvsV3Config.setDcvsEnable = 1;
    powerConfig.dcvsV3Config.contextId     = m_powerConfigId;

    // refer QnnHtpPerfInfrastructure.h
    powerConfig.dcvsV3Config.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE;

    // Set Sleep-Disable latency parameter
    powerConfig.dcvsV3Config.setSleepDisable = 0;
    powerConfig.dcvsV3Config.sleepDisable    = 0;

    // Set Sleep latency parameter
    powerConfig.dcvsV3Config.setSleepLatency = 0;
    powerConfig.dcvsV3Config.sleepLatency    = 1000; // range 40-2000 micro sec

    // Set Bus Clock Parameters (refer QnnHtpPerfInfrastructure.h)
    powerConfig.dcvsV3Config.setBusParams           = 1;
    powerConfig.dcvsV3Config.busVoltageCornerMin    = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
    powerConfig.dcvsV3Config.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
    powerConfig.dcvsV3Config.busVoltageCornerMax    = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;

    // set Core Clock Parameters (refer QnnHtpPerfInfrastructure.h)
    powerConfig.dcvsV3Config.setCoreParams           = 1;
    powerConfig.dcvsV3Config.coreVoltageCornerMin    = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
    powerConfig.dcvsV3Config.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;
    powerConfig.dcvsV3Config.coreVoltageCornerMax    = DCVS_VOLTAGE_VCORNER_TURBO_PLUS;

    // Set power config with different performance parameters
    const QnnHtpPerfInfrastructure_PowerConfig_t* powerConfigs[] = {&powerConfig, NULL};
    if (QNN_SUCCESS != m_perfInfra->setPowerConfig(m_powerConfigId, powerConfigs)) {
        QNN_ERROR("Failure in setPowerConfig() from boostPerformance");
        return false;
    }

    return true;
}

bool QnnApi::resetPerformance() {
    // Initialize the power config and select the voltage corner values for the performance setting.
    QnnHtpPerfInfrastructure_PowerConfig_t powerConfig;
    memset(&powerConfig, 0, sizeof(powerConfig));

    powerConfig.option                     = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
    powerConfig.dcvsV3Config.dcvsEnable    = 1;
    powerConfig.dcvsV3Config.setDcvsEnable = 1;
    powerConfig.dcvsV3Config.contextId     = m_powerConfigId;

    // refer QnnHtpPerfInfrastructure.h
    powerConfig.dcvsV3Config.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE;

    // Set Sleep-Disable latency parameter
    powerConfig.dcvsV3Config.setSleepDisable = 0;
    powerConfig.dcvsV3Config.sleepDisable    = 0;

    // Set Sleep latency parameter
    powerConfig.dcvsV3Config.setSleepLatency = 0;
    powerConfig.dcvsV3Config.sleepLatency    = 1000; // range 40-2000 micro sec

    // Set Bus Clock Parameters (refer QnnHtpPerfInfrastructure.h)
    powerConfig.dcvsV3Config.setBusParams           = 1;
    powerConfig.dcvsV3Config.busVoltageCornerMin    = DCVS_VOLTAGE_VCORNER_NOM;
    powerConfig.dcvsV3Config.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM;
    powerConfig.dcvsV3Config.busVoltageCornerMax    = DCVS_VOLTAGE_VCORNER_TURBO;

    // set Core Clock Parameters (refer QnnHtpPerfInfrastructure.h)
    powerConfig.dcvsV3Config.setCoreParams           = 1;
    powerConfig.dcvsV3Config.coreVoltageCornerMin    = DCVS_VOLTAGE_VCORNER_NOM;
    powerConfig.dcvsV3Config.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM;
    powerConfig.dcvsV3Config.coreVoltageCornerMax    = DCVS_VOLTAGE_VCORNER_TURBO;

    // Set power config with different performance parameters
    const QnnHtpPerfInfrastructure_PowerConfig_t* powerConfigs[] = {&powerConfig, NULL};
    if (QNN_SUCCESS != m_perfInfra->setPowerConfig(m_powerConfigId, powerConfigs)) {
        QNN_ERROR("Failure in setPowerConfig() from resetPerformance");
        return false;
    }

    return true;
}

bool QnnApi::initialize(
        std::string                     backendPath,
        std::vector<std::string>        modelPathOrCachedBinaryPathVec,
        BackendExtensionsConfigs        backendExtensionsConfig,
        PerfProfile                     parsedPerfProfile,
        ContextConfigs                  contextConfig,
        std::vector<GraphConfigs>       graphConfigs,
        bool                            loadFromCachedBinary,
        std::string                     systemLibraryPath,
        bool                            debugModeRequested,
        int64_t                         spill_fill_buffer_size,
        bool                            mmapContextBins,
        bool                            asyncInit,
        uint64_t                        mmap_budget,
        bool                            debug_qnn,
        bool                            graphSwitching,
        const std::vector<std::string>& execSelectGraphs,
        bool                            loadSelectGraphs
) {
    if (modelPathOrCachedBinaryPathVec.size() > 1 && false == loadFromCachedBinary) {
        QNN_ERROR("Currently only 1 model file is supported for this framework! \
            Although multiple context files are supported!");
        return false;
    }

    m_mmapContextBins = mmapContextBins;

    // Setting up Debug mode
    m_DebugModeRequested = debugModeRequested;
    if (m_DebugModeRequested) {
        QNN_WARN("Warning: Debug mode set to true.");
    }

    // Initialize the QNN run time
    if (false == getQnnInterface(backendPath)) {
        QNN_ERROR("Qnn getQnnInterface FAILED!");
        return false;
    }

    if (loadFromCachedBinary) {
        if (false == getQnnSystemInterface(systemLibraryPath)) {
            QNN_ERROR("Qnn getQnnSystemInterface FAILED!");
            return false;
        }
    } else {
        if (false == loadModel(modelPathOrCachedBinaryPathVec[0])) {
            QNN_ERROR("Loading model FAILED!");
            return false;
        }
    }

    QnnLog_Level_t logLevel = QNN_LOG_LEVEL_WARN;
    if (false == initializeLogging(logLevel, debug_qnn)) {
        QNN_ERROR("Unable to Initialize logging in backend");
        return false;
    }

    // initialize backend extensions
#ifdef QUALLA_INTERNAL_QNN_SDK
    // Initialize backendExtensions only when both backend ext config and backend ext lib are provided
    if (!backendExtensionsConfig.configFilePath.empty() &&
        false == initializeBackendExtensions(
                         backendExtensionsConfig, parsedPerfProfile, debug_qnn
                 )) {
        QNN_WARN("Failure in initializing backend extensions.");
    }
#else
    if (false ==
        initializeBackendExtensions(backendExtensionsConfig, parsedPerfProfile, debug_qnn)) {
        QNN_ERROR("Failure in initializing backend extensions.");
        return false;
    }
#endif
    if (false == initializeBackend()) {
        QNN_ERROR("Qnn initializeBackend FAILED!");
        return false;
    }
    if (false == createDevice()) {
        QNN_ERROR("Device Creation failure");
        setDeviceStatus(false);
        return false;
    } else {
        setDeviceStatus(true);
    }
    if (!loadFromCachedBinary) {
        if (false == createContext(contextConfig)) {
            QNN_ERROR("Qnn createContext FAILED!");
            return false;
        }
        if (false == composeGraphs(graphConfigs)) {
            QNN_ERROR("composeGraphs FAILED!");
            return false;
        }
        if (false == finalizeGraphs()) {
            QNN_ERROR("finalizeGraphs FAILED!");
            return false;
        }
    } else {
        bool cfb_ret   = false;
        bool asyncCapability = false;
#if QUALLA_QNN_API_VERSION >= 21700
        if(asyncInit == true){
            if (!checkCapabilityOfCreateAsync(asyncCapability)) {
                QNN_ERROR("Capabilty checked failed");
                return false;
            }
            asyncInit = asyncCapability && asyncInit;
        }
        if (asyncInit == true) {
            QNN_INFO("Using create From Binary List Async");
            cfb_ret = createFromBinaryListAsync(
                    modelPathOrCachedBinaryPathVec,
                    contextConfig,
                    spill_fill_buffer_size,
                    mmap_budget,
                    graphSwitching,
                    execSelectGraphs,
                    loadSelectGraphs
            );
            if (cfb_ret == false) {
                QNN_ERROR("Create From Binary List Async FAILED!");
                return false;
            }

        } else {
#endif
            QNN_INFO("Using create From Binary");
            cfb_ret = createFromBinary(
                    modelPathOrCachedBinaryPathVec,
                    contextConfig,
                    spill_fill_buffer_size,
                    mmap_budget,
                    graphSwitching,
                    execSelectGraphs,
                    loadSelectGraphs
            );
            if (false == cfb_ret) {
                QNN_ERROR("Create From Binary FAILED!");
                return false;
            }
        }
#if QUALLA_QNN_API_VERSION >= 21700
    }
#endif

    // if (false == initializePerformance()) {
    //     QNN_ERROR("initialize Performance FAILED!");
    //     return false;
    // }

    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
        m_graphNameToIndex[m_graphsInfo[graphIdx]->graphName] = graphIdx;
    }

#if NSP_LOG_LEVEL > 1
    for (const auto& graphNameIndex : m_graphNameToIndex) {
        QNN_DEBUG(
                "Found Graph name %s corresponding to index %d",
                graphNameIndex.first.c_str(),
                graphNameIndex.second
        );
    }

    fprintf(stderr, "context_handles = [");
    for (auto ctx_handle : m_contextVec)
        fprintf(stderr, "%p, ", ctx_handle);
    fprintf(stderr, "]\n");
#endif
    return true;
}

bool QnnApi::initialize(
        std::string               backendPath,
        std::string               modelPath,
        std::string               opPackage,
        ContextConfigs            contextConfig,
        std::vector<GraphConfigs> graphConfigs,
        uint32_t*                 inputDim,
        uint32_t                  inputRank,
        uint32_t*                 outputDim,
        uint32_t                  outputRank,
        uint32_t*                 kvDim,
        uint32_t                  kvRank,
        Qnn_Param_t*              params,
        uint32_t                  numParams,
        bool                      debugModeRequested
) {
    // Setting up Debug mode
    m_DebugModeRequested = debugModeRequested;
    if (m_DebugModeRequested) {
        QNN_WARN("Warning: Debug mode set to true.");
    }

    // Initialize the QNN run time
    if (false == getQnnInterface(backendPath)) {
        QNN_ERROR("Qnn getQnnInterface FAILED!");
        return false;
    }

    QnnLog_Level_t logLevel = QNN_LOG_LEVEL_WARN;
    if (false == initializeLogging(logLevel, false)) {
        QNN_ERROR("Unable to Initialize logging in backend");
    }

    if (false == initializeBackend()) {
        QNN_ERROR("Qnn initializeBackend FAILED!");
        return false;
    }

    //CPU does not support createDevice.
    setDeviceStatus(false);
    if (false == registerOpPackage(opPackage)) {
        QNN_ERROR("Qnn initializeBackend FAILED!");
        return false;
    }

// Change to 1 to enable QNN Basic profiling
#if 0
    if (false == initProfiling()) {
        QNN_ERROR("Profiling init failure");
        return false;
    }
#endif
    if (false == loadModel(modelPath)) {
        QNN_ERROR("Loading model FAILED!");
        return false;
    }
    if (false == createContext(contextConfig)) {
        QNN_ERROR("Qnn createContext FAILED!");
        return false;
    }
    if (false == composeGraphs(
                         graphConfigs, inputDim, inputRank, outputDim, outputRank, kvDim, kvRank, params, numParams
                 )) {
        QNN_ERROR("composeGraphs FAILED!");
        return false;
    }
    if (false == finalizeGraphs()) {
        QNN_ERROR("finalizeGraphs FAILED!");
        return false;
    }

    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
        m_graphNameToIndex[m_graphsInfo[graphIdx]->graphName] = graphIdx;
    }
#if NSP_LOG_LEVEL > 1
    for (const auto& graphNameIndex : m_graphNameToIndex) {
        QNN_DEBUG(
                "Found Graph name %s corresponding to index %d",
                graphNameIndex.first.c_str(),
                graphNameIndex.second
        );
    }
#endif
    return true;
}

bool QnnApi::graphExecute(
        Qnn_Tensor_t*                                       input,
        Qnn_Tensor_t*                                       output,
        std::string                                         graphName,
        std::map<std::string, std::pair<double, uint16_t>>& timeLogs
) {
    QnnGraph_Config_t** customGraphConfigs{nullptr};
    uint32_t            configCount{0};
    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->beforeExecute(
                    graphName.c_str(), &customGraphConfigs, &configCount
            )) {
            QNN_ERROR("Extensions Failure in beforeExecute()");
            return false;
        }
        if (customGraphConfigs) {
            if (true != setGraphConfigsBeforeExecute(
                                m_graphsInfo[m_graphNameToIndex[graphName]]->graph,
                                customGraphConfigs,
                                configCount
                        )) {
                QNN_ERROR("Failure in setGraphConfigsBeforeExecute()");
                return false;
            }
        }
    }

    // if (true != boostPerformance()) {
    //     QNN_ERROR("Couldn't boost the performance");
    //     return false;
    // }

    Qnn_ErrorHandle_t ret = QNN_GRAPH_NO_ERROR;
    try {
#if NSP_LOG_LEVEL > 1
        auto start = std::chrono::steady_clock::now();
#endif
        ret = m_qnnInterface.graphExecute(
                m_graphsInfo[m_graphNameToIndex[graphName]]->graph,
                input,
                m_graphsInfo[m_graphNameToIndex[graphName]]->numInputTensors,
                output,
                m_graphsInfo[m_graphNameToIndex[graphName]]->numOutputTensors,
                m_profileBackendHandle,
                nullptr
        );
#if NSP_LOG_LEVEL > 1
        auto stop = std::chrono::steady_clock::now();
        QNN_DEBUG(
                "graphExecute[%s] took: %lld us",
                graphName.c_str(),
                std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
        );
#endif
#if NSP_LOG_LEVEL > 6
        timeLogs[graphName].first += static_cast<double>(
                std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
        );
        timeLogs[graphName].second++;
#endif

    } catch (const std::exception& ex) {
        QNN_ERROR("ERROR executing inference ret");
    } catch (...) {
        QNN_ERROR("ERROR executing inference ret");
    }

    if (m_profileBackendHandle) {
        extractBackendProfilingInfo(m_profileBackendHandle, timeLogs, graphName);
    }

    // if (true != resetPerformance()) {
    //     QNN_ERROR("Couldn't reset the performance");
    //     return false;
    // }

    if (ret != QNN_GRAPH_NO_ERROR) return false;

    if (nullptr != m_backendExtensions && m_backendExtensions->interface()) {
        if (!m_backendExtensions->interface()->afterExecute()) {
            QNN_ERROR("Extensions Failure in afterExecute()");
            return false;
        }
    }

    return true;
}

bool QnnApi::getTensorQuantParams(
        const Qnn_Tensor_t*      tensor,
        std::vector<QuantParam>& quantParamsVec
) {
    bool status      = false;
    auto dataType    = QNN_TENSOR_GET_DATA_TYPE(tensor);
    auto quantParams = QNN_TENSOR_GET_QUANT_PARAMS(tensor);
    if (dataType == QNN_DATATYPE_UFIXED_POINT_8 || dataType == QNN_DATATYPE_SFIXED_POINT_8 ||
        dataType == QNN_DATATYPE_UFIXED_POINT_16) {
        auto quantEncodingType = quantParams.quantizationEncoding;
        if (quantEncodingType ==
            Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
            status         = true;
            double  scale  = quantParams.scaleOffsetEncoding.scale;
            int32_t offset = quantParams.scaleOffsetEncoding.offset;
            quantParamsVec.emplace_back(scale, offset);
        } else if (quantEncodingType ==
                   Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
            status              = true;
            auto encodingStruct = quantParams.axisScaleOffsetEncoding;
            for (uint32_t n = 0; n < encodingStruct.numScaleOffsets; n++) {
                auto scaleOffset = encodingStruct.scaleOffset[n];
                quantParamsVec.emplace_back(scaleOffset.scale, scaleOffset.offset);
            }
        } else {
            QNN_ERROR("quant encoding type not supported");
        }
    }
    return status;
}

bool QnnApi::getTensorShape(std::vector<size_t>& tensorDims, const TensorWrapper& tensorWrapper) {
    const Qnn_Tensor_t& tensor = GET_TENSOR_WRAPPER_TENSOR(tensorWrapper);
    if (false ==
        fillDims(tensorDims, QNN_TENSOR_GET_DIMENSIONS(tensor), QNN_TENSOR_GET_RANK(tensor)))
        return false;

    tensorDims.push_back(getDataTypeSize(QNN_TENSOR_GET_DATA_TYPE(tensor)));
    return true;
}

bool QnnApi::getTensorNameAndShape(
        std::string&         tensorName,
        std::vector<size_t>& tensorDims,
        TensorWrapper&       tensorWrapper
) {
    Qnn_Tensor_t& tensor = GET_TENSOR_WRAPPER_TENSOR(tensorWrapper);
    tensorName           = std::string(GET_TENSOR_WRAPPER_NAME(tensorWrapper));
    if (false ==
        fillDims(tensorDims, QNN_TENSOR_GET_DIMENSIONS(tensor), QNN_TENSOR_GET_RANK(tensor)))
        return false;

    tensorDims.push_back(g_qnnDataTypeToSize[QNN_TENSOR_GET_DATA_TYPE(tensor)]);
    return true;
}

bool QnnApi::extractBackendProfilingInfo(
        Qnn_ProfileHandle_t                                 profileHandle,
        std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
        std::string                                         graphName
) {
    if (nullptr == m_profileBackendHandle) {
        QNN_ERROR("QNN HTP Profile handle is nullptr; may not be initialized.");
        return false;
    }
    const QnnProfile_EventId_t* profileEvents{nullptr};
    uint32_t                    numEvents{0};
    if (QNN_PROFILE_NO_ERROR !=
        m_qnnInterface.profileGetEvents(profileHandle, &profileEvents, &numEvents)) {
        QNN_ERROR("Failure in QNN HTP profile get events.");
        return false;
    }
    QNN_DEBUG("ProfileEvents: [%p], numEvents: [%d]", profileEvents, numEvents);
    for (size_t event = 0; event < numEvents; event++) {
        extractProfilingEvent(*(profileEvents + event), timeLogs, graphName);
        extractProfilingSubEvents(*(profileEvents + event), timeLogs, graphName);
    }
    return true;
}

bool QnnApi::extractProfilingSubEvents(
        QnnProfile_EventId_t                                profileEventId,
        std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
        std::string                                         graphName
) {
    const QnnProfile_EventId_t* profileSubEvents{nullptr};
    uint32_t                    numSubEvents{0};
    if (QNN_PROFILE_NO_ERROR !=
        m_qnnInterface.profileGetSubEvents(profileEventId, &profileSubEvents, &numSubEvents)) {
        QNN_ERROR("Failure in QNN HTP profile get sub events.");
        return false;
    }
    QNN_DEBUG("ProfileSubEvents: [%p], numSubEvents: [%d]", profileSubEvents, numSubEvents);
    for (size_t subEvent = 0; subEvent < numSubEvents; subEvent++) {
        extractProfilingEvent(*(profileSubEvents + subEvent), timeLogs, graphName);
        extractProfilingSubEvents(*(profileSubEvents + subEvent), timeLogs, graphName);
    }
    return true;
}

bool QnnApi::extractProfilingEvent(
        QnnProfile_EventId_t                                profileEventId,
        std::map<std::string, std::pair<double, uint16_t>>& timeLogs,
        std::string                                         graphName
) {
    QnnProfile_EventData_t eventData;
    if (QNN_PROFILE_NO_ERROR != m_qnnInterface.profileGetEventData(profileEventId, &eventData)) {
        QNN_ERROR("Failure in profile get event type.");
        return false;
    }

    QNN_DEBUG(
            "Event Info - Event Type: [%d], Event Value: [%lu], Event Identifier: [%s], Event Unit: [%d]",
            eventData.type,
            eventData.value,
            eventData.identifier,
            eventData.unit
    );
#if NSP_LOG_LEVEL > 6
    timeLogs[graphName + "_" + eventData.identifier].first += static_cast<double>(eventData.value);
    timeLogs[graphName + "_" + eventData.identifier].second++;
#endif

    return true;
}

bool QnnApi::extractBackendProfilingInfo(Qnn_ProfileHandle_t profileHandle) {
    if (nullptr == m_profileBackendHandle) {
        QNN_ERROR("QNN HTP Profile handle is nullptr; may not be initialized.");
        return false;
    }
    const QnnProfile_EventId_t* profileEvents{nullptr};
    uint32_t                    numEvents{0};
    if (QNN_PROFILE_NO_ERROR !=
        m_qnnInterface.profileGetEvents(profileHandle, &profileEvents, &numEvents)) {
        QNN_ERROR("Failure in QNN HTP profile get events.");
        return false;
    }
    QNN_DEBUG("ProfileEvents: [%p], numEvents: [%d]", profileEvents, numEvents);
    for (size_t event = 0; event < numEvents; event++) {
        extractProfilingEvent(*(profileEvents + event));
        extractProfilingSubEvents(*(profileEvents + event));
    }
    return true;
}

bool QnnApi::extractProfilingSubEvents(QnnProfile_EventId_t profileEventId) {
    const QnnProfile_EventId_t* profileSubEvents{nullptr};
    uint32_t                    numSubEvents{0};
    if (QNN_PROFILE_NO_ERROR !=
        m_qnnInterface.profileGetSubEvents(profileEventId, &profileSubEvents, &numSubEvents)) {
        QNN_ERROR("Failure in QNN HTP profile get sub events.");
        return false;
    }
    QNN_DEBUG("ProfileSubEvents: [%p], numSubEvents: [%d]", profileSubEvents, numSubEvents);
    for (size_t subEvent = 0; subEvent < numSubEvents; subEvent++) {
        extractProfilingEvent(*(profileSubEvents + subEvent));
        extractProfilingSubEvents(*(profileSubEvents + subEvent));
    }
    return true;
}

bool QnnApi::extractProfilingEvent(QnnProfile_EventId_t profileEventId) {
    QnnProfile_EventData_t eventData;
    if (QNN_PROFILE_NO_ERROR != m_qnnInterface.profileGetEventData(profileEventId, &eventData)) {
        QNN_ERROR("Failure in profile get event type.");
        return false;
    }

    QNN_DEBUG(
            "Event Info - Event Type: [%d], Event Value: [%lu], Event Identifier: [%s], Event Unit: [%d]",
            eventData.type,
            eventData.value,
            eventData.identifier,
            eventData.unit
    );

    return true;
}

bool QnnApi::applyBinarySection(uint32_t graphId, std::string binSectionPath) {
#if QUALLA_QNN_API_VERSION < 21700
    QNN_ERROR("LoRA adaptors require QNN SDK >= 2.25.1. Please update your libraries");
    return false;
#else
    // assumption splitNum  from 0
    QNN_DEBUG("QnnApi::applyBinarySection %d ", graphId);
    if (nullptr == m_qnnInterface.contextApplyBinarySection) {
        QNN_ERROR("contextApplyBinarySection Interface not suported!!");
        return false;
    }
    if (graphId >= m_graphsCount) {
        QNN_ERROR(" Passed split %d  base Model graphcount %d ", graphId, m_graphsCount);
        return false;
    }
    uint64_t                 bufferSize{0};
    std::shared_ptr<uint8_t> buffer{nullptr};
    bufferSize = getFileSize(binSectionPath);
    buffer     = std::shared_ptr<uint8_t>(new uint8_t[bufferSize]);
    if (true != readBinaryFromFile(binSectionPath, buffer.get(), bufferSize)) {
        QNN_ERROR("Failed to read binary data for context index = %d", graphId);
        return false;
    }

    QnnContext_Buffer_t qnnBuffer;
    qnnBuffer.version               = QNN_CONTEXT_BUFFER_VERSION_1;
    qnnBuffer.v1.memType            = QNN_CONTEXTMEMTYPE_RAW;
    qnnBuffer.v1.binaryBuf.dataSize = bufferSize;
    qnnBuffer.v1.binaryBuf.data     = static_cast<void*>(buffer.get());
    auto graphCountPerContext       = getGraphCountPerContext();
    if (graphCountPerContext <= 0) {
        QNN_ERROR(" graphCountPerContext is <=0 ");
        return false;
    }

    auto contextHandle = m_contextVec[graphId / graphCountPerContext];
    auto graphHandle   = m_graphsInfo[graphId]->graph;
    if (contextHandle == nullptr || graphHandle == nullptr) {
        QNN_ERROR(" contexthandle or graph handle is null for patch no = %d ", graphId);
        return false;
    }

    auto errorCode = m_qnnInterface.contextApplyBinarySection(
        contextHandle,
        graphHandle,
        QNN_CONTEXT_SECTION_UPDATABLE,
        &qnnBuffer,
        nullptr, //profile handle is null
        nullptr  //singal handle is null
    );
    if (errorCode != QNN_SUCCESS) {
        QNN_ERROR("Could not Apply Patch for graph = %d errocode = %zu ", graphId, errorCode);
        return false;
    }
    return true;
#endif
}

bool QnnApi::applyBinarySection(uint32_t binIndex, std::string binSectionPath,bool useMmap,bool graphSwitch) {
#if QUALLA_QNN_API_VERSION < 21700
    QNN_ERROR("LoRA adaptors require QNN SDK >= 2.25.1. Please update your libraries");
    return false;
#else
    // assumption splitNum  from 0
    QNN_DEBUG("QnnApi::applyBinarySection %d ", binIndex);
    uint32_t numAdapterGraph = 0;
    if (nullptr == m_qnnInterface.contextApplyBinarySection) {
        QNN_ERROR("contextApplyBinarySection Interface not suported!!");
        return false;
    }
    if (binIndex >= m_graphsCount) {
        QNN_ERROR(" Passed split %d  base Model graphcount %d ", binIndex, m_graphsCount);
        return false;
    }
    uint64_t                 bufferSize{0};
    std::shared_ptr<uint8_t> buffer{nullptr};
    bufferSize = getFileSize(binSectionPath);

    auto graphCountPerContext       = getGraphCountPerContext();
    if (graphCountPerContext <= 0) {
        QNN_ERROR(" graphCountPerContext is <=0 ");
        return false;
    }
    const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
    QnnSystemContext_Handle_t sysCtxHandle{nullptr};
    if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
         QNN_ERROR("Could not create system handle for context index = %zu", binIndex);
         return false;
    }
    Qnn_ContextBinarySize_t binaryInfoSize{0};

    if(m_adapterNameToBuffer[binSectionPath]){
        buffer = m_adapterNameToBuffer[binSectionPath];
        if (QNN_SUCCESS != m_qnnSystemInterface.systemContextGetBinaryInfo(
                            sysCtxHandle,
                            static_cast<void*>(buffer.get()),
                            bufferSize,
                            &binaryInfo,
                            &binaryInfoSize
                    )) {
                   QNN_ERROR("Failed to get context binary info for context index = %zu", binIndex);
                   return false;
        }
    }
    else{
            if (!mapAndGetContextBinaryInfo(
                    useMmap,
                    buffer,
                    binSectionPath,
                    bufferSize,
                    binIndex,
                    graphSwitch,
                    sysCtxHandle,
                    &binaryInfo
            )) {
                 QNN_ERROR("Failed to map context Binary for contextIdx: %zu", binIndex);
                 return false;
            }
            m_adapterNameToBuffer[binSectionPath] = buffer;
    }
    numAdapterGraph = getNumGraphInBinary(binaryInfo);
    if (numAdapterGraph <= 0) {
        QNN_ERROR(" numAdapterGraph is <=0 ");
        return false;
    }
    uint32_t contextId =  0;
    uint32_t graphId = 0;
    for(auto idx = 0;idx<numAdapterGraph;idx++){
        graphId = numAdapterGraph*binIndex + idx;
        contextId = graphId / graphCountPerContext;
        auto contextHandle = m_contextVec[contextId];
        auto graphHandle   = m_graphsInfo[graphId]->graph;
        if (contextHandle == nullptr || graphHandle == nullptr) {
            QNN_ERROR(" contexthandle or graph handle is null for patch no = %d ", graphId);
            return false;
        }

        QnnContext_Buffer_t qnnBuffer;
        qnnBuffer.version               = QNN_CONTEXT_BUFFER_VERSION_1;
        qnnBuffer.v1.memType            = QNN_CONTEXTMEMTYPE_RAW;
        qnnBuffer.v1.binaryBuf.dataSize = bufferSize;
        qnnBuffer.v1.binaryBuf.data     = static_cast<void*>(buffer.get());

        auto errorCode = m_qnnInterface.contextApplyBinarySection(
                contextHandle,
                graphHandle,
                QNN_CONTEXT_SECTION_UPDATABLE,
                &qnnBuffer,
                nullptr, //profile handle is null
                nullptr  //singal handle is null
        );
        if (errorCode != QNN_SUCCESS) {
            QNN_ERROR("Could not Apply Patch for graph = %d errocode = %zu ", graphId, errorCode);
            return false;
        }
    }
    if(updateIOEncodings(buffer,bufferSize,numAdapterGraph*binIndex) ==false)
    {
        QNN_ERROR("qnn-htp: Adapter updateIOEncodings failed");
        return false;
    }
    return true;
#endif
}

bool QnnApi::updateIOEncodings(std::shared_ptr<uint8_t>& buffer,uint64_t  bufferSize,uint32_t graphIndex) {

    QNN_DEBUG("Applying adapter Encodings");
    QnnSystemContext_Handle_t sysCtxHandle{nullptr};
    if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
        QNN_ERROR("Could not create system handle for context index = %zu", graphIndex);
        return false;
    }
    const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
    Qnn_ContextBinarySize_t binaryInfoSize{0};
    if (QNN_SUCCESS != m_qnnSystemInterface.systemContextGetBinaryInfo(
                            sysCtxHandle,
                            static_cast<void*>(buffer.get()),
                            bufferSize,
                            &binaryInfo,
                            &binaryInfoSize
                    )) {
        QNN_ERROR("Failed to get context binary info for context index = %zu", graphIndex);
        return false;
    }
    if (!updateMetaDataToGraphsInfo(binaryInfo,  m_graphsInfo,graphIndex)) {
        QNN_ERROR("Failed to copy metadata for graph index = %zu", graphIndex);
        return false;
    }
    m_qnnSystemInterface.systemContextFree(sysCtxHandle);
    sysCtxHandle = nullptr;
    QNN_DEBUG(" updateIOEncodings success ");
    return true;
}

// This is a light weight function of existing ::createFromBinary, used for
// GPU execution to avoid conflicts with HTP use-case and for better readability.
bool QnnApi::createFromBinary(
    std::vector<std::string> cachedBinariesPathVec
) {
    auto _start = std::chrono::steady_clock::now();

    if (nullptr == m_qnnSystemInterface.systemContextCreate ||
        nullptr == m_qnnSystemInterface.systemContextGetBinaryInfo ||
        nullptr == m_qnnSystemInterface.systemContextFree) {
        QNN_ERROR("QNN System function pointers are not populated.");
        return false;
    }

    graphCountPerContext = getGraphCountPerContext();

    for (size_t contextIdx = 0; contextIdx < cachedBinariesPathVec.size(); contextIdx++) {
        uint64_t                 bufferSize{0};
        std::shared_ptr<uint8_t> buffer{nullptr};
        uint32_t                 graphsCount;

        // read serialized binary into a byte buffer
        bufferSize = getFileSize(cachedBinariesPathVec[contextIdx]);
        if (0 == bufferSize) {
            QNN_ERROR(
                    "Received path to an empty file for context index = %zu. Nothing to deserialize.",
                    contextIdx
            );
            return false;
        }

        buffer = std::shared_ptr<uint8_t>(
                new uint8_t[bufferSize], std::default_delete<uint8_t[]>()
        );
        if (!buffer) {
            QNN_ERROR("Failed to allocate memory for context index = %zu", contextIdx);
            return false;
        }
        if (true !=
            readBinaryFromFile(cachedBinariesPathVec[contextIdx], buffer.get(), bufferSize)) {
            QNN_ERROR("Failed to read binary data for context index = %zu", contextIdx);
            return false;
        }

        // inspect binary info
        QnnSystemContext_Handle_t sysCtxHandle{nullptr};
        if (QNN_SUCCESS != m_qnnSystemInterface.systemContextCreate(&sysCtxHandle)) {
            QNN_ERROR("Could not create system handle for context index = %zu", contextIdx);
            return false;
        }

        const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
        Qnn_ContextBinarySize_t              binaryInfoSize{0};

        if (QNN_SUCCESS != m_qnnSystemInterface.systemContextGetBinaryInfo(
                                   sysCtxHandle,
                                   static_cast<void*>(buffer.get()),
                                   bufferSize,
                                   &binaryInfo,
                                   &binaryInfoSize
                           )) {
            QNN_ERROR("Failed to get context binary info for context index = %zu", contextIdx);
            return false;
        }

        GraphInfo_t** graphsInfo;
        if (!copyMetadataToGraphsInfo(binaryInfo, graphsInfo, graphsCount)) {
            QNN_ERROR("Failed to copy metadata for graph index = %zu", contextIdx);
            freeGraphsInfo(&graphsInfo, graphsCount);
            if (contextIdx > 0) freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }

        if (graphCountPerContext == -1) {
            graphCountPerContext = graphsCount;
            m_graphsInfo         = (GraphInfo_t**)calloc(
                    graphCountPerContext * cachedBinariesPathVec.size(), sizeof(GraphInfo_t*)
            );
        } else if (graphCountPerContext != graphsCount) {
            QNN_ERROR(
                    "Different len(graphs) found in different context files. Found %u vs %u",
                    graphsCount,
                    graphCountPerContext
            );
            freeGraphsInfo(&graphsInfo, graphsCount);
            if (contextIdx > 0) freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }
        m_qnnSystemInterface.systemContextFree(sysCtxHandle);
        sysCtxHandle = nullptr;

        if (nullptr == m_qnnInterface.contextCreateFromBinary) {
            QNN_ERROR(
                    "contextCreateFromBinaryFnHandle is nullptr for context index = %zu", contextIdx
            );
            freeGraphsInfo(&graphsInfo, graphsCount);
            if (contextIdx > 0) freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }
        Qnn_ContextHandle_t contextHandle{nullptr};
        auto _stop = std::chrono::steady_clock::now();
        QNN_DEBUG(
                "Loading contexts[%lu] took: %lld us",
                contextIdx,
                std::chrono::duration_cast<std::chrono::microseconds>(_stop - _start).count()
        );

        auto start = std::chrono::steady_clock::now();

        auto errCode = m_qnnInterface.contextCreateFromBinary(
                m_backendHandle,
                m_deviceHandle,
                nullptr,
                (const void*)buffer.get(),
                bufferSize,
                &contextHandle,
                nullptr // profile handle

        );

        if (errCode != QNN_SUCCESS) {
            QNN_ERROR(
                    "Could not create context from binary for context index = %zu : err %d",
                    contextIdx,
                    (int)errCode
            );
            freeGraphsInfo(&graphsInfo, graphsCount);
            if (contextIdx > 0) freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }

        auto stop = std::chrono::steady_clock::now();
        QNN_DEBUG(
                "Initializing context[%lu] with %u graphs took: %lld us",
                contextIdx,
                graphsCount,
                std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count()
        );

        for (int n_graph = 0; n_graph < graphsCount; n_graph++) {
            // Allocate inputTensors and outputTensors
            GraphInfo_t* cur_graph = graphsInfo[n_graph];

            m_graphsInfo[m_graphsCount++] = cur_graph;
            m_contextMap[cur_graph]       = contextHandle;
        }
        m_contextVec.push_back(contextHandle);
    }

    m_isContextCreated = true;

    QNN_DEBUG(
            "Initialized %u graphs from %lu contexts", m_graphsCount, cachedBinariesPathVec.size()
    );

    if (nullptr == m_qnnInterface.graphRetrieve) {
        QNN_ERROR("graphRetrieveFnHandle is nullptr.");
        freeGraphsInfo(&m_graphsInfo, m_graphsCount);
        return false;
    }

    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
        if (!m_graphsInfo || QNN_SUCCESS != m_qnnInterface.graphRetrieve(
                                                    m_contextVec[graphIdx / graphCountPerContext],
                                                    m_graphsInfo[graphIdx]->graphName,
                                                    &(m_graphsInfo[graphIdx]->graph)
                                            )) {
            QNN_ERROR("Unable to retrieve graph handle for graph index = %zu", graphIdx);
            freeGraphsInfo(&m_graphsInfo, m_graphsCount);
            return false;
        }
    }

    return true;
}

bool QnnApi::initialize(
        std::string               backendPath,
        std::vector<std::string>  modelPathOrCachedBinaryPath
) {
    if (modelPathOrCachedBinaryPath.size() != 1) {
        QNN_ERROR("Multiple Files not supported for now!!");
        return false;
    }

    if (false == getQnnInterface(backendPath)) {
        QNN_ERROR("Qnn getQnnInterface FAILED!");
        return false;
    }

    const std::string systemLibraryPath = "libQnnSystem.so";
    if (false == getQnnSystemInterface(systemLibraryPath)) {
        QNN_ERROR("Qnn getQnnSystemInterface FAILED!");
        return false;
    }

    QnnLog_Level_t logLevel = QNN_LOG_LEVEL_INFO;
    if (false == initializeLogging(logLevel, false)) {
        QNN_ERROR("Unable to Initialize logging in backend");
        return false;
    }

    // Initialize Backend
    if (false == initializeBackend()) {
        QNN_ERROR("Qnn initializeBackend FAILED!");
        return false;
    }

    if (false == createFromBinary(modelPathOrCachedBinaryPath)) {
        QNN_ERROR("Create From Binary FAILED!");
        return false;
    }

    for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) {
        m_graphNameToIndex[m_graphsInfo[graphIdx]->graphName] = graphIdx;
    }
    QNN_DEBUG("Model Initialized");

    return true;
}
