Inference¶
Open a command shell on Linux host and run:
# make sure a test device is connected
adb devices
# push artifacts to device
adb push ${QNN_SDK_ROOT}/bin/aarch64-android/genie-t2t-run /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libGenie.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformer.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformerCpuOpPkg.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformerModel.so /data/local/tmp/
adb push <path to tokenizer.json> /data/local/tmp/
adb push <path to llama2-7b-genaitransformer.json> /data/local/tmp/
adb push <path to model bin file, e.g. <path-to-downloaded-LLama-model-directory>/model.bin> /data/local/tmp/
# open adb shell
adb shell
export LD_LIBRARY_PATH=/data/local/tmp/
export PATH=$LD_LIBRARY_PATH:$PATH
cd $LD_LIBRARY_PATH
./genie-t2t-run -c <path to llama2-7b-genaitransformer.json>
-p "Tell me about Qualcomm"