mirror of
https://github.com/ivabus/pantry
synced 2024-11-10 02:25:18 +03:00
GitHub.com/ggerganov/llama.cpp update (#3696)
* llama.cpp, github version instead of hardcoded version * llama.cpp, check if model is specified, if yes, run it, if not, then download model * Use entrypoint for custom llama.cpp invocation * `llama.cpp` is just raw executable. This I think is our new pattern. * To run chat use the entrypoint: `pkgx +brewkit -- run llama.cpp` Co-authored-by: James Reynolds <magnsuviri@me.com> Co-authored-by: Max Howell <mxcl@me.com>
This commit is contained in:
parent
d3ba600cd5
commit
2b06942c62
|
@ -1,14 +1,12 @@
|
|||
# getting started
|
||||
|
||||
```sh
|
||||
$ llama.cpp
|
||||
# ^^ default chat prompt with the OpenLLaMA model
|
||||
$ pkgx +brewkit -- run llama.cpp
|
||||
# ^^ default chat prompt with an appropriate hugging face model
|
||||
```
|
||||
|
||||
If you want to run `llama.cpp` with your own args specify them and chat mode
|
||||
will be skipped.
|
||||
|
||||
If you want to use a different model specify `--model`.
|
||||
If you want to run `llama.cpp` with your own args `pkgx llama.cpp $ARGS` is
|
||||
your friend.
|
||||
|
||||
# converting your own models
|
||||
|
||||
|
@ -16,5 +14,7 @@ We provide a working `convert.py` from the llama.cpp project. To use it you
|
|||
need to launch it via a tea pkgenv:
|
||||
|
||||
```sh
|
||||
tea +github.com/ggerganov/llama.cpp convert.py path/to/your/model
|
||||
pkgx +llama.cpp -- convert.py path/to/your/model
|
||||
# ^^ the -- is necessary since `convert.py` is a not listed in the llama.cpp
|
||||
# provides list
|
||||
```
|
||||
|
|
32
projects/github.com/ggerganov/llama.cpp/entrypoint.sh
Executable file
32
projects/github.com/ggerganov/llama.cpp/entrypoint.sh
Executable file
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env -S pkgx +gum +aria2c bash
|
||||
|
||||
set -eo pipefail
|
||||
test -n "$VERBOSE" && set -x
|
||||
|
||||
MODEL_URL="https://huggingface.co/TheBloke/dolphin-2.1-mistral-7B-GGUF/resolve/main/dolphin-2.1-mistral-7b.Q4_0.gguf"
|
||||
MODEL_FILENAME=$(basename "$MODEL_URL")
|
||||
MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}"/llama.cpp
|
||||
|
||||
if [ ! -f "$MODEL_DIR/$MODEL_FILENAME" ]; then
|
||||
gum format <<EoMD
|
||||
# downloading $MODEL_FILENAME
|
||||
models will be placed: \`$PWD\`
|
||||
> this may take a a few minutes…
|
||||
EoMD
|
||||
echo #spacer
|
||||
mkdir -p "$MODEL_DIR"
|
||||
aria2c "$MODEL_URL" --dir="$MODEL_DIR"
|
||||
gum format "# All done!"
|
||||
echo #spacer
|
||||
fi
|
||||
|
||||
D="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
exec "$D"/bin/llama.cpp \
|
||||
--model "$MODEL_DIR/$MODEL_FILENAME" \
|
||||
-n 256 \
|
||||
--repeat_penalty 1.0 \
|
||||
--color \
|
||||
--interactive \
|
||||
--reverse-prompt "User:" \
|
||||
--file "$D"/share/prompts/chat-with-bob.txt
|
|
@ -1,31 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
test -n "$VERBOSE" && set -x
|
||||
|
||||
if test -f "$1"/VERSION && test $(cat "$1"/VERSION) = $2; then
|
||||
exit
|
||||
fi
|
||||
|
||||
mkdir -p "$1"
|
||||
cd "$1"
|
||||
|
||||
tea gum format <<EoMD
|
||||
# downloading OpenLLaMA 3Bv2
|
||||
models will be placed: \`$PWD\`
|
||||
> this may take a a few minutes…
|
||||
EoMD
|
||||
|
||||
tea wget \
|
||||
--continue \
|
||||
--output-document OpenLLaMA-3Bv2.ggml.f16.bin \
|
||||
'https://huggingface.co/SlyEcho/open_llama_3b_v2_ggml/resolve/main/open-llama-3b-v2-f16.bin'
|
||||
|
||||
tea wget \
|
||||
--continue \
|
||||
'https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model'
|
||||
|
||||
echo $2 > VERSION
|
||||
|
||||
tea gum format "# All done!"
|
||||
echo #spacer
|
|
@ -1,37 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
test -n "$VERBOSE" && set -x
|
||||
|
||||
D="$(cd "$(dirname "$0")"/.. && pwd)"
|
||||
VERSION="$(basename "$D")"
|
||||
MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/models/OpenLLaMA"
|
||||
|
||||
"$D"/libexec/llama-fetch "$MODEL_DIR" "$VERSION"
|
||||
|
||||
if [ "$1" = '--fetch' ]; then
|
||||
exit
|
||||
fi
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
exec "$D"/libexec/llama.cpp \
|
||||
--model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin" \
|
||||
--ctx-size 512 \
|
||||
--batch-size 1024 \
|
||||
--n-predict 256 \
|
||||
--keep 48 \
|
||||
--repeat_penalty 1.0 \
|
||||
--color \
|
||||
--interactive \
|
||||
--reverse-prompt "User:" \
|
||||
--file "$D"/share/prompts/chat-with-bob.txt
|
||||
fi
|
||||
|
||||
# detect --model arg or not
|
||||
for arg in "$@"; do
|
||||
if [ "$arg" = "--model" -o "$arg" = "-m" ]; then
|
||||
exec "$D"/libexec/llama.cpp "$@"
|
||||
fi
|
||||
done
|
||||
|
||||
exec "$D"/libexec/llama.cpp "$@" --model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin"
|
|
@ -1,9 +1,10 @@
|
|||
distributable:
|
||||
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/master-fff0e0e.tar.gz
|
||||
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/b{{version.raw}}.tar.gz
|
||||
strip-components: 1
|
||||
|
||||
versions:
|
||||
- 2023.07.20
|
||||
github: ggerganov/llama.cpp/tags
|
||||
strip: /^b/
|
||||
|
||||
display-name:
|
||||
LLaMA.cpp
|
||||
|
@ -20,7 +21,7 @@ platforms:
|
|||
|
||||
dependencies:
|
||||
python.org: ~3.11
|
||||
tea.xyz: ^0 # the scripts use tea/cli
|
||||
pkgx.sh: ^1
|
||||
|
||||
build:
|
||||
dependencies:
|
||||
|
@ -28,28 +29,29 @@ build:
|
|||
env:
|
||||
VIRTUAL_ENV: ${{prefix}}/venv
|
||||
script:
|
||||
- make --jobs {{hw.concurrency}}
|
||||
|
||||
- |
|
||||
make --jobs {{hw.concurrency}}
|
||||
install -D main {{prefix}}/libexec/llama.cpp
|
||||
install -D props/llama.cpp {{prefix}}/bin/llama.cpp
|
||||
install -D main {{prefix}}/bin/llama.cpp
|
||||
install -D props/entrypoint.sh {{prefix}}/entrypoint.sh
|
||||
install -D ggml-metal.metal {{prefix}}/bin/ggml-metal.metal
|
||||
|
||||
- |
|
||||
mkdir -p {{prefix}}/share
|
||||
mv prompts {{prefix}}/share
|
||||
mv props/llama-fetch {{prefix}}/libexec
|
||||
|
||||
- |
|
||||
install -D convert.py $VIRTUAL_ENV/bin/convert.py
|
||||
python-venv-stubber.sh convert.py
|
||||
|
||||
- python -m venv $VIRTUAL_ENV
|
||||
- |
|
||||
python -m venv $VIRTUAL_ENV
|
||||
source $VIRTUAL_ENV/bin/activate
|
||||
pip install -r requirements.txt
|
||||
deactivate
|
||||
|
||||
test: |
|
||||
{{prefix}}/libexec/llama.cpp --help
|
||||
# testing more than this requires downloading the models 😬
|
||||
test:
|
||||
'{{prefix}}/bin/llama.cpp --help'
|
||||
# ^^ testing more than this requires downloading the models 😬
|
||||
|
||||
entrypoint: llama.cpp
|
||||
entrypoint: ./entrypoint.sh
|
||||
|
|
Loading…
Reference in a new issue