From 7c803208a2c1ba0a1cda5e6e8e6fc1a107718689 Mon Sep 17 00:00:00 2001 From: Max Howell Date: Mon, 24 Jul 2023 16:43:32 -0400 Subject: [PATCH] update llama.cpp; use OpenLLaMA (#2655) --- .../antimatter15/alpaca.cpp/package.yml | 3 + .../github.com/ggerganov/llama.cpp/README.md | 20 +++++++ .../ggerganov/llama.cpp/llama-fetch | 43 +++----------- .../github.com/ggerganov/llama.cpp/llama.cpp | 40 ++++++++----- .../ggerganov/llama.cpp/package.yml | 59 ++++++++++++------- .../ggerganov/llama.cpp/pyproject.toml | 8 --- 6 files changed, 92 insertions(+), 81 deletions(-) create mode 100644 projects/github.com/ggerganov/llama.cpp/README.md delete mode 100644 projects/github.com/ggerganov/llama.cpp/pyproject.toml diff --git a/projects/github.com/antimatter15/alpaca.cpp/package.yml b/projects/github.com/antimatter15/alpaca.cpp/package.yml index 10e99247..664f096d 100644 --- a/projects/github.com/antimatter15/alpaca.cpp/package.yml +++ b/projects/github.com/antimatter15/alpaca.cpp/package.yml @@ -1,3 +1,6 @@ +warnings: + - deprecated + distributable: url: https://github.com/antimatter15/alpaca.cpp/archive/refs/tags/81bd894.tar.gz strip-components: 1 diff --git a/projects/github.com/ggerganov/llama.cpp/README.md b/projects/github.com/ggerganov/llama.cpp/README.md new file mode 100644 index 00000000..a63d42fd --- /dev/null +++ b/projects/github.com/ggerganov/llama.cpp/README.md @@ -0,0 +1,20 @@ +# getting started + +```sh +$ llama.cpp +# ^^ default chat prompt with the OpenLLaMA model +``` + +If you want to run `llama.cpp` with your own args specify them and chat mode +will be skipped. + +If you want to use a different model specify `--model`. + +# converting your own models + +We provide a working `convert.py` from the llama.cpp project. To use it you +need to launch it via a tea pkgenv: + +```sh +tea +github.com/ggerganov/llama.cpp convert.py path/to/your/model +``` diff --git a/projects/github.com/ggerganov/llama.cpp/llama-fetch b/projects/github.com/ggerganov/llama.cpp/llama-fetch index 8c3cba99..6e3fe8e7 100755 --- a/projects/github.com/ggerganov/llama.cpp/llama-fetch +++ b/projects/github.com/ggerganov/llama.cpp/llama-fetch @@ -7,50 +7,23 @@ if test -f "$1"/VERSION && test $(cat "$1"/VERSION) = $2; then exit fi -if command -v git >/dev/null; then - GIT="tea git" -else - GIT=git -fi - mkdir -p "$1" cd "$1" -tea gum format "# preparing for model fetch" -echo # spacer - - -if test -d trackers; then - $GIT -C trackers fetch origin - $GIT -C trackers reset --hard origin/master -else - $GIT clone "https://github.com/ngosang/trackerslist" trackers -fi - -TRACKERS=$(grep -v '^#' "trackers/trackers_all.txt" | tr '\n' ',') - tea gum format < this may take a a few minutes… EoMD -# --dir is .. coz the torrent is prefixed with a LLaMA directory -tea aria2c \ - --dir=.. \ - --seed-time=0 \ - --bt-tracker="$TRACKERS" \ - --summary-interval=0 \ - --check-integrity \ - 'magnet:?xt=urn:btih:FLWZABA5EDZXVC45YHEMX6W55O6S4HXK' +tea wget \ + --continue \ + --output-document OpenLLaMA-3Bv2.ggml.f16.bin \ + 'https://huggingface.co/SlyEcho/open_llama_3b_v2_ggml' -tea gum format "# finalizing model" -echo #spacer - -tea +google.com/sentencepiece upgrade-model.py 7B tokenizer.model - -rm 7B/ggml-model-q4_0.bin -mv 7B/ggml-model-q4_0.bin.tmp 7B/ggml-model-q4_0.bin +tea wget \ + --continue \ + 'https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model' echo $2 > VERSION diff --git a/projects/github.com/ggerganov/llama.cpp/llama.cpp b/projects/github.com/ggerganov/llama.cpp/llama.cpp index 57889c37..8e822b29 100755 --- a/projects/github.com/ggerganov/llama.cpp/llama.cpp +++ b/projects/github.com/ggerganov/llama.cpp/llama.cpp @@ -5,25 +5,33 @@ test -n "$VERBOSE" && set -x D="$(cd "$(dirname "$0")"/.. && pwd)" VERSION="$(basename "$D")" -MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/models/LLaMA" +MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/models/OpenLLaMA" -export PATH="$D/tbin:$PATH" +"$D"/libexec/llama-fetch "$MODEL_DIR" "$VERSION" -llama-fetch "$MODEL_DIR" "$VERSION" +if [ "$1" = '--fetch' ]; then + exit +fi -if test $1 = chat; then - exec "$D"/tbin/llama.cpp \ - --model "$MODEL_DIR"/7B/ggml-model-q4_0.bin \ - -n 256 \ +if [ $# -eq 0 ]; then + exec "$D"/libexec/llama.cpp \ + --model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin" \ + --ctx-size 512 \ + --batch-size 1024 \ + --n-predict 256 \ + --keep 48 \ --repeat_penalty 1.0 \ --color \ - -i \ - -r \ - "User:" \ - -f "$D"/share/prompts/chat-with-bob.txt -else - exec "$D"/tbin/llama.cpp \ - --color \ - --model "$MODEL_DIR"/7B/ggml-model-q4_0.bin \ - "$@" + --interactive \ + --reverse-prompt "User:" \ + --file "$D"/share/prompts/chat-with-bob.txt fi + +# detect --model arg or not +for arg in "$@"; do + if [ "$arg" = "--model" -o "$arg" = "-m" ]; then + exec "$D"/libexec/llama.cpp "$@" + fi +done + +exec "$D"/libexec/llama.cpp "$@" --model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin" diff --git a/projects/github.com/ggerganov/llama.cpp/package.yml b/projects/github.com/ggerganov/llama.cpp/package.yml index ee4c8017..4a058e2a 100644 --- a/projects/github.com/ggerganov/llama.cpp/package.yml +++ b/projects/github.com/ggerganov/llama.cpp/package.yml @@ -1,39 +1,54 @@ distributable: - url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/master-8b67998.tar.gz + url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/master-fff0e0e.tar.gz strip-components: 1 versions: - - 2023.04.11 + - 2023.07.20 provides: - bin/llama.cpp + # NOTE! we do not “provide” convert.py. ∵ it’s too generic + # do `tea +github.com∕ggerganov∕llama.cpp convert.py` + +platforms: + - linux + - darwin/aarch64 + # Illegal instruction: 4 on darwin/x86-64 + +dependencies: + python.org: ^3.11 + tea.xyz: ^0 # the scripts use tea/cli build: dependencies: tea.xyz/gx/cc: c99 tea.xyz/gx/make: '*' - freedesktop.org/pkg-config: ~0.29 - gnu.org/wget: '*' - script: | - mkdir -p {{prefix}}/bin {{prefix}}/tbin {{prefix}}/share + gnu.org/coreutils: '*' + env: + VIRTUAL_ENV: ${{prefix}}/venv + script: + - | + make --jobs {{hw.concurrency}} + install -D main {{prefix}}/libexec/llama.cpp + install -D props/llama.cpp {{prefix}}/bin/llama.cpp - make - mv main {{prefix}}/tbin/llama.cpp + - | + mkdir -p {{prefix}}/share + mv prompts {{prefix}}/share + mv props/llama-fetch {{prefix}}/libexec - mv props/llama.cpp {{prefix}}/bin + - | + install -D convert.py $VIRTUAL_ENV/bin/convert.py + python-venv-stubber.sh convert.py - mv props/llama-fetch {{prefix}}/tbin - mv $SRCROOT/prompts {{prefix}}/share - - mv $SRCROOT/*.py {{prefix}}/tbin - mv quantize {{prefix}}/tbin/quantize - - wget \ - --no-check-certificate \ - 'https://gist.githubusercontent.com/eiz/828bddec6162a023114ce19146cb2b82/raw/6b1d2b192815e6d61386a9a8853f2c3293b3f568/gistfile1.txt' \ - -O {{prefix}}/tbin/upgrade-model.py - chmod +x {{prefix}}/tbin/upgrade-model.py + - python -m venv $VIRTUAL_ENV + - | + source $VIRTUAL_ENV/bin/activate + pip install -r requirements.txt + deactivate test: | - {{prefix}}/tbin/llama.cpp --help -# testing more than this requires downloading the models 😬 + {{prefix}}/libexec/llama.cpp --help + # testing more than this requires downloading the models 😬 + +entrypoint: llama.cpp diff --git a/projects/github.com/ggerganov/llama.cpp/pyproject.toml b/projects/github.com/ggerganov/llama.cpp/pyproject.toml deleted file mode 100644 index 664d0be0..00000000 --- a/projects/github.com/ggerganov/llama.cpp/pyproject.toml +++ /dev/null @@ -1,8 +0,0 @@ -[project] -requires-python = ">=3.7" -name = "llama.cpp" -version = '0.0.0' - -dependencies = [ - "sentencepiece>=0.1.96" -]