mirror of
https://github.com/ivabus/pantry
synced 2024-11-22 08:25:07 +03:00
GitHub.com/ggerganov/llama.cpp update (#3696)
* llama.cpp, github version instead of hardcoded version * llama.cpp, check if model is specified, if yes, run it, if not, then download model * Use entrypoint for custom llama.cpp invocation * `llama.cpp` is just raw executable. This I think is our new pattern. * To run chat use the entrypoint: `pkgx +brewkit -- run llama.cpp` Co-authored-by: James Reynolds <magnsuviri@me.com> Co-authored-by: Max Howell <mxcl@me.com>
This commit is contained in:
parent
d3ba600cd5
commit
2b06942c62
5 changed files with 53 additions and 87 deletions
|
@ -1,14 +1,12 @@
|
||||||
# getting started
|
# getting started
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ llama.cpp
|
$ pkgx +brewkit -- run llama.cpp
|
||||||
# ^^ default chat prompt with the OpenLLaMA model
|
# ^^ default chat prompt with an appropriate hugging face model
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to run `llama.cpp` with your own args specify them and chat mode
|
If you want to run `llama.cpp` with your own args `pkgx llama.cpp $ARGS` is
|
||||||
will be skipped.
|
your friend.
|
||||||
|
|
||||||
If you want to use a different model specify `--model`.
|
|
||||||
|
|
||||||
# converting your own models
|
# converting your own models
|
||||||
|
|
||||||
|
@ -16,5 +14,7 @@ We provide a working `convert.py` from the llama.cpp project. To use it you
|
||||||
need to launch it via a tea pkgenv:
|
need to launch it via a tea pkgenv:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
tea +github.com/ggerganov/llama.cpp convert.py path/to/your/model
|
pkgx +llama.cpp -- convert.py path/to/your/model
|
||||||
|
# ^^ the -- is necessary since `convert.py` is a not listed in the llama.cpp
|
||||||
|
# provides list
|
||||||
```
|
```
|
||||||
|
|
32
projects/github.com/ggerganov/llama.cpp/entrypoint.sh
Executable file
32
projects/github.com/ggerganov/llama.cpp/entrypoint.sh
Executable file
|
@ -0,0 +1,32 @@
|
||||||
|
#!/usr/bin/env -S pkgx +gum +aria2c bash
|
||||||
|
|
||||||
|
set -eo pipefail
|
||||||
|
test -n "$VERBOSE" && set -x
|
||||||
|
|
||||||
|
MODEL_URL="https://huggingface.co/TheBloke/dolphin-2.1-mistral-7B-GGUF/resolve/main/dolphin-2.1-mistral-7b.Q4_0.gguf"
|
||||||
|
MODEL_FILENAME=$(basename "$MODEL_URL")
|
||||||
|
MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}"/llama.cpp
|
||||||
|
|
||||||
|
if [ ! -f "$MODEL_DIR/$MODEL_FILENAME" ]; then
|
||||||
|
gum format <<EoMD
|
||||||
|
# downloading $MODEL_FILENAME
|
||||||
|
models will be placed: \`$PWD\`
|
||||||
|
> this may take a a few minutes…
|
||||||
|
EoMD
|
||||||
|
echo #spacer
|
||||||
|
mkdir -p "$MODEL_DIR"
|
||||||
|
aria2c "$MODEL_URL" --dir="$MODEL_DIR"
|
||||||
|
gum format "# All done!"
|
||||||
|
echo #spacer
|
||||||
|
fi
|
||||||
|
|
||||||
|
D="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
|
||||||
|
exec "$D"/bin/llama.cpp \
|
||||||
|
--model "$MODEL_DIR/$MODEL_FILENAME" \
|
||||||
|
-n 256 \
|
||||||
|
--repeat_penalty 1.0 \
|
||||||
|
--color \
|
||||||
|
--interactive \
|
||||||
|
--reverse-prompt "User:" \
|
||||||
|
--file "$D"/share/prompts/chat-with-bob.txt
|
|
@ -1,31 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
set -e
|
|
||||||
test -n "$VERBOSE" && set -x
|
|
||||||
|
|
||||||
if test -f "$1"/VERSION && test $(cat "$1"/VERSION) = $2; then
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
|
|
||||||
mkdir -p "$1"
|
|
||||||
cd "$1"
|
|
||||||
|
|
||||||
tea gum format <<EoMD
|
|
||||||
# downloading OpenLLaMA 3Bv2
|
|
||||||
models will be placed: \`$PWD\`
|
|
||||||
> this may take a a few minutes…
|
|
||||||
EoMD
|
|
||||||
|
|
||||||
tea wget \
|
|
||||||
--continue \
|
|
||||||
--output-document OpenLLaMA-3Bv2.ggml.f16.bin \
|
|
||||||
'https://huggingface.co/SlyEcho/open_llama_3b_v2_ggml/resolve/main/open-llama-3b-v2-f16.bin'
|
|
||||||
|
|
||||||
tea wget \
|
|
||||||
--continue \
|
|
||||||
'https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model'
|
|
||||||
|
|
||||||
echo $2 > VERSION
|
|
||||||
|
|
||||||
tea gum format "# All done!"
|
|
||||||
echo #spacer
|
|
|
@ -1,37 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
set -e
|
|
||||||
test -n "$VERBOSE" && set -x
|
|
||||||
|
|
||||||
D="$(cd "$(dirname "$0")"/.. && pwd)"
|
|
||||||
VERSION="$(basename "$D")"
|
|
||||||
MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/models/OpenLLaMA"
|
|
||||||
|
|
||||||
"$D"/libexec/llama-fetch "$MODEL_DIR" "$VERSION"
|
|
||||||
|
|
||||||
if [ "$1" = '--fetch' ]; then
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
exec "$D"/libexec/llama.cpp \
|
|
||||||
--model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin" \
|
|
||||||
--ctx-size 512 \
|
|
||||||
--batch-size 1024 \
|
|
||||||
--n-predict 256 \
|
|
||||||
--keep 48 \
|
|
||||||
--repeat_penalty 1.0 \
|
|
||||||
--color \
|
|
||||||
--interactive \
|
|
||||||
--reverse-prompt "User:" \
|
|
||||||
--file "$D"/share/prompts/chat-with-bob.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
# detect --model arg or not
|
|
||||||
for arg in "$@"; do
|
|
||||||
if [ "$arg" = "--model" -o "$arg" = "-m" ]; then
|
|
||||||
exec "$D"/libexec/llama.cpp "$@"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
exec "$D"/libexec/llama.cpp "$@" --model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin"
|
|
|
@ -1,9 +1,10 @@
|
||||||
distributable:
|
distributable:
|
||||||
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/master-fff0e0e.tar.gz
|
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/b{{version.raw}}.tar.gz
|
||||||
strip-components: 1
|
strip-components: 1
|
||||||
|
|
||||||
versions:
|
versions:
|
||||||
- 2023.07.20
|
github: ggerganov/llama.cpp/tags
|
||||||
|
strip: /^b/
|
||||||
|
|
||||||
display-name:
|
display-name:
|
||||||
LLaMA.cpp
|
LLaMA.cpp
|
||||||
|
@ -20,7 +21,7 @@ platforms:
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
python.org: ~3.11
|
python.org: ~3.11
|
||||||
tea.xyz: ^0 # the scripts use tea/cli
|
pkgx.sh: ^1
|
||||||
|
|
||||||
build:
|
build:
|
||||||
dependencies:
|
dependencies:
|
||||||
|
@ -28,28 +29,29 @@ build:
|
||||||
env:
|
env:
|
||||||
VIRTUAL_ENV: ${{prefix}}/venv
|
VIRTUAL_ENV: ${{prefix}}/venv
|
||||||
script:
|
script:
|
||||||
|
- make --jobs {{hw.concurrency}}
|
||||||
|
|
||||||
- |
|
- |
|
||||||
make --jobs {{hw.concurrency}}
|
install -D main {{prefix}}/bin/llama.cpp
|
||||||
install -D main {{prefix}}/libexec/llama.cpp
|
install -D props/entrypoint.sh {{prefix}}/entrypoint.sh
|
||||||
install -D props/llama.cpp {{prefix}}/bin/llama.cpp
|
install -D ggml-metal.metal {{prefix}}/bin/ggml-metal.metal
|
||||||
|
|
||||||
- |
|
- |
|
||||||
mkdir -p {{prefix}}/share
|
mkdir -p {{prefix}}/share
|
||||||
mv prompts {{prefix}}/share
|
mv prompts {{prefix}}/share
|
||||||
mv props/llama-fetch {{prefix}}/libexec
|
|
||||||
|
|
||||||
- |
|
- |
|
||||||
install -D convert.py $VIRTUAL_ENV/bin/convert.py
|
install -D convert.py $VIRTUAL_ENV/bin/convert.py
|
||||||
python-venv-stubber.sh convert.py
|
python-venv-stubber.sh convert.py
|
||||||
|
|
||||||
- python -m venv $VIRTUAL_ENV
|
|
||||||
- |
|
- |
|
||||||
|
python -m venv $VIRTUAL_ENV
|
||||||
source $VIRTUAL_ENV/bin/activate
|
source $VIRTUAL_ENV/bin/activate
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
deactivate
|
deactivate
|
||||||
|
|
||||||
test: |
|
test:
|
||||||
{{prefix}}/libexec/llama.cpp --help
|
'{{prefix}}/bin/llama.cpp --help'
|
||||||
# testing more than this requires downloading the models 😬
|
# ^^ testing more than this requires downloading the models 😬
|
||||||
|
|
||||||
entrypoint: llama.cpp
|
entrypoint: ./entrypoint.sh
|
||||||
|
|
Loading…
Reference in a new issue