Inference

Open a command shell on Linux host and run:

# make sure a test device is connected
adb devices

# push artifacts to device
adb push ${QNN_SDK_ROOT}/bin/aarch64-android/genie-t2t-run /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libGenie.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformer.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformerCpuOpPkg.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformerModel.so /data/local/tmp/
adb push <path to tokenizer.json> /data/local/tmp/
adb push <path to llama2-7b-genaitransformer.json> /data/local/tmp/
adb push <path to model bin file, e.g. <path-to-downloaded-LLama-model-directory>/model.bin> /data/local/tmp/

# open adb shell
adb shell

export LD_LIBRARY_PATH=/data/local/tmp/
export PATH=$LD_LIBRARY_PATH:$PATH

cd $LD_LIBRARY_PATH
./genie-t2t-run -c <path to llama2-7b-genaitransformer.json>
                -p "Tell me about Qualcomm"