GitHub.com/ggerganov/llama.cpp update (#3696)

* llama.cpp, github version instead of hardcoded version
* llama.cpp, check if model is specified, if yes, run it, if not, then download model
* Use entrypoint for custom llama.cpp invocation
* `llama.cpp` is just raw executable. This I think is our new pattern.
* To run chat use the entrypoint: `pkgx +brewkit -- run llama.cpp`

Co-authored-by: James Reynolds <magnsuviri@me.com>
Co-authored-by: Max Howell <mxcl@me.com>
This commit is contained in:
James Reynolds 2023-10-26 05:24:04 -06:00 committed by GitHub
parent d3ba600cd5
commit 2b06942c62
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 87 deletions

View file

@ -1,14 +1,12 @@
# getting started
```sh
$ llama.cpp
# ^^ default chat prompt with the OpenLLaMA model
$ pkgx +brewkit -- run llama.cpp
# ^^ default chat prompt with an appropriate hugging face model
```
If you want to run `llama.cpp` with your own args specify them and chat mode
will be skipped.
If you want to use a different model specify `--model`.
If you want to run `llama.cpp` with your own args `pkgx llama.cpp $ARGS` is
your friend.
# converting your own models
@ -16,5 +14,7 @@ We provide a working `convert.py` from the llama.cpp project. To use it you
need to launch it via a tea pkgenv:
```sh
tea +github.com/ggerganov/llama.cpp convert.py path/to/your/model
pkgx +llama.cpp -- convert.py path/to/your/model
# ^^ the -- is necessary since `convert.py` is a not listed in the llama.cpp
# provides list
```

View file

@ -0,0 +1,32 @@
#!/usr/bin/env -S pkgx +gum +aria2c bash
set -eo pipefail
test -n "$VERBOSE" && set -x
MODEL_URL="https://huggingface.co/TheBloke/dolphin-2.1-mistral-7B-GGUF/resolve/main/dolphin-2.1-mistral-7b.Q4_0.gguf"
MODEL_FILENAME=$(basename "$MODEL_URL")
MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}"/llama.cpp
if [ ! -f "$MODEL_DIR/$MODEL_FILENAME" ]; then
gum format <<EoMD
# downloading $MODEL_FILENAME
models will be placed: \`$PWD\`
> this may take a a few minutes…
EoMD
echo #spacer
mkdir -p "$MODEL_DIR"
aria2c "$MODEL_URL" --dir="$MODEL_DIR"
gum format "# All done!"
echo #spacer
fi
D="$(cd "$(dirname "$0")" && pwd)"
exec "$D"/bin/llama.cpp \
--model "$MODEL_DIR/$MODEL_FILENAME" \
-n 256 \
--repeat_penalty 1.0 \
--color \
--interactive \
--reverse-prompt "User:" \
--file "$D"/share/prompts/chat-with-bob.txt

View file

@ -1,31 +0,0 @@
#!/bin/sh
set -e
test -n "$VERBOSE" && set -x
if test -f "$1"/VERSION && test $(cat "$1"/VERSION) = $2; then
exit
fi
mkdir -p "$1"
cd "$1"
tea gum format <<EoMD
# downloading OpenLLaMA 3Bv2
models will be placed: \`$PWD\`
> this may take a a few minutes…
EoMD
tea wget \
--continue \
--output-document OpenLLaMA-3Bv2.ggml.f16.bin \
'https://huggingface.co/SlyEcho/open_llama_3b_v2_ggml/resolve/main/open-llama-3b-v2-f16.bin'
tea wget \
--continue \
'https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model'
echo $2 > VERSION
tea gum format "# All done!"
echo #spacer

View file

@ -1,37 +0,0 @@
#!/bin/sh
set -e
test -n "$VERBOSE" && set -x
D="$(cd "$(dirname "$0")"/.. && pwd)"
VERSION="$(basename "$D")"
MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/models/OpenLLaMA"
"$D"/libexec/llama-fetch "$MODEL_DIR" "$VERSION"
if [ "$1" = '--fetch' ]; then
exit
fi
if [ $# -eq 0 ]; then
exec "$D"/libexec/llama.cpp \
--model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin" \
--ctx-size 512 \
--batch-size 1024 \
--n-predict 256 \
--keep 48 \
--repeat_penalty 1.0 \
--color \
--interactive \
--reverse-prompt "User:" \
--file "$D"/share/prompts/chat-with-bob.txt
fi
# detect --model arg or not
for arg in "$@"; do
if [ "$arg" = "--model" -o "$arg" = "-m" ]; then
exec "$D"/libexec/llama.cpp "$@"
fi
done
exec "$D"/libexec/llama.cpp "$@" --model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin"

View file

@ -1,9 +1,10 @@
distributable:
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/master-fff0e0e.tar.gz
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/b{{version.raw}}.tar.gz
strip-components: 1
versions:
- 2023.07.20
github: ggerganov/llama.cpp/tags
strip: /^b/
display-name:
LLaMA.cpp
@ -20,7 +21,7 @@ platforms:
dependencies:
python.org: ~3.11
tea.xyz: ^0 # the scripts use tea/cli
pkgx.sh: ^1
build:
dependencies:
@ -28,28 +29,29 @@ build:
env:
VIRTUAL_ENV: ${{prefix}}/venv
script:
- make --jobs {{hw.concurrency}}
- |
make --jobs {{hw.concurrency}}
install -D main {{prefix}}/libexec/llama.cpp
install -D props/llama.cpp {{prefix}}/bin/llama.cpp
install -D main {{prefix}}/bin/llama.cpp
install -D props/entrypoint.sh {{prefix}}/entrypoint.sh
install -D ggml-metal.metal {{prefix}}/bin/ggml-metal.metal
- |
mkdir -p {{prefix}}/share
mv prompts {{prefix}}/share
mv props/llama-fetch {{prefix}}/libexec
- |
install -D convert.py $VIRTUAL_ENV/bin/convert.py
python-venv-stubber.sh convert.py
- python -m venv $VIRTUAL_ENV
- |
python -m venv $VIRTUAL_ENV
source $VIRTUAL_ENV/bin/activate
pip install -r requirements.txt
deactivate
test: |
{{prefix}}/libexec/llama.cpp --help
# testing more than this requires downloading the models 😬
test:
'{{prefix}}/bin/llama.cpp --help'
# ^^ testing more than this requires downloading the models 😬
entrypoint: llama.cpp
entrypoint: ./entrypoint.sh