mirror of
https://github.com/ivabus/pantry
synced 2024-11-22 08:25:07 +03:00
update llama.cpp; use OpenLLaMA (#2655)
This commit is contained in:
parent
b8428969ec
commit
7c803208a2
6 changed files with 92 additions and 81 deletions
|
@ -1,3 +1,6 @@
|
||||||
|
warnings:
|
||||||
|
- deprecated
|
||||||
|
|
||||||
distributable:
|
distributable:
|
||||||
url: https://github.com/antimatter15/alpaca.cpp/archive/refs/tags/81bd894.tar.gz
|
url: https://github.com/antimatter15/alpaca.cpp/archive/refs/tags/81bd894.tar.gz
|
||||||
strip-components: 1
|
strip-components: 1
|
||||||
|
|
20
projects/github.com/ggerganov/llama.cpp/README.md
Normal file
20
projects/github.com/ggerganov/llama.cpp/README.md
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# getting started
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ llama.cpp
|
||||||
|
# ^^ default chat prompt with the OpenLLaMA model
|
||||||
|
```
|
||||||
|
|
||||||
|
If you want to run `llama.cpp` with your own args specify them and chat mode
|
||||||
|
will be skipped.
|
||||||
|
|
||||||
|
If you want to use a different model specify `--model`.
|
||||||
|
|
||||||
|
# converting your own models
|
||||||
|
|
||||||
|
We provide a working `convert.py` from the llama.cpp project. To use it you
|
||||||
|
need to launch it via a tea pkgenv:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
tea +github.com/ggerganov/llama.cpp convert.py path/to/your/model
|
||||||
|
```
|
|
@ -7,50 +7,23 @@ if test -f "$1"/VERSION && test $(cat "$1"/VERSION) = $2; then
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if command -v git >/dev/null; then
|
|
||||||
GIT="tea git"
|
|
||||||
else
|
|
||||||
GIT=git
|
|
||||||
fi
|
|
||||||
|
|
||||||
mkdir -p "$1"
|
mkdir -p "$1"
|
||||||
cd "$1"
|
cd "$1"
|
||||||
|
|
||||||
tea gum format "# preparing for model fetch"
|
|
||||||
echo # spacer
|
|
||||||
|
|
||||||
|
|
||||||
if test -d trackers; then
|
|
||||||
$GIT -C trackers fetch origin
|
|
||||||
$GIT -C trackers reset --hard origin/master
|
|
||||||
else
|
|
||||||
$GIT clone "https://github.com/ngosang/trackerslist" trackers
|
|
||||||
fi
|
|
||||||
|
|
||||||
TRACKERS=$(grep -v '^#' "trackers/trackers_all.txt" | tr '\n' ',')
|
|
||||||
|
|
||||||
tea gum format <<EoMD
|
tea gum format <<EoMD
|
||||||
# downloading 4b quantized LLaMA (7B) model
|
# downloading OpenLLaMA 3Bv2
|
||||||
models will be placed: \`$PWD\`
|
models will be placed: \`$PWD\`
|
||||||
> this may take a a few minutes…
|
> this may take a a few minutes…
|
||||||
EoMD
|
EoMD
|
||||||
|
|
||||||
# --dir is .. coz the torrent is prefixed with a LLaMA directory
|
tea wget \
|
||||||
tea aria2c \
|
--continue \
|
||||||
--dir=.. \
|
--output-document OpenLLaMA-3Bv2.ggml.f16.bin \
|
||||||
--seed-time=0 \
|
'https://huggingface.co/SlyEcho/open_llama_3b_v2_ggml'
|
||||||
--bt-tracker="$TRACKERS" \
|
|
||||||
--summary-interval=0 \
|
|
||||||
--check-integrity \
|
|
||||||
'magnet:?xt=urn:btih:FLWZABA5EDZXVC45YHEMX6W55O6S4HXK'
|
|
||||||
|
|
||||||
tea gum format "# finalizing model"
|
tea wget \
|
||||||
echo #spacer
|
--continue \
|
||||||
|
'https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model'
|
||||||
tea +google.com/sentencepiece upgrade-model.py 7B tokenizer.model
|
|
||||||
|
|
||||||
rm 7B/ggml-model-q4_0.bin
|
|
||||||
mv 7B/ggml-model-q4_0.bin.tmp 7B/ggml-model-q4_0.bin
|
|
||||||
|
|
||||||
echo $2 > VERSION
|
echo $2 > VERSION
|
||||||
|
|
||||||
|
|
|
@ -5,25 +5,33 @@ test -n "$VERBOSE" && set -x
|
||||||
|
|
||||||
D="$(cd "$(dirname "$0")"/.. && pwd)"
|
D="$(cd "$(dirname "$0")"/.. && pwd)"
|
||||||
VERSION="$(basename "$D")"
|
VERSION="$(basename "$D")"
|
||||||
MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/models/LLaMA"
|
MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/models/OpenLLaMA"
|
||||||
|
|
||||||
export PATH="$D/tbin:$PATH"
|
"$D"/libexec/llama-fetch "$MODEL_DIR" "$VERSION"
|
||||||
|
|
||||||
llama-fetch "$MODEL_DIR" "$VERSION"
|
if [ "$1" = '--fetch' ]; then
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
if test $1 = chat; then
|
if [ $# -eq 0 ]; then
|
||||||
exec "$D"/tbin/llama.cpp \
|
exec "$D"/libexec/llama.cpp \
|
||||||
--model "$MODEL_DIR"/7B/ggml-model-q4_0.bin \
|
--model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin" \
|
||||||
-n 256 \
|
--ctx-size 512 \
|
||||||
|
--batch-size 1024 \
|
||||||
|
--n-predict 256 \
|
||||||
|
--keep 48 \
|
||||||
--repeat_penalty 1.0 \
|
--repeat_penalty 1.0 \
|
||||||
--color \
|
--color \
|
||||||
-i \
|
--interactive \
|
||||||
-r \
|
--reverse-prompt "User:" \
|
||||||
"User:" \
|
--file "$D"/share/prompts/chat-with-bob.txt
|
||||||
-f "$D"/share/prompts/chat-with-bob.txt
|
|
||||||
else
|
|
||||||
exec "$D"/tbin/llama.cpp \
|
|
||||||
--color \
|
|
||||||
--model "$MODEL_DIR"/7B/ggml-model-q4_0.bin \
|
|
||||||
"$@"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# detect --model arg or not
|
||||||
|
for arg in "$@"; do
|
||||||
|
if [ "$arg" = "--model" -o "$arg" = "-m" ]; then
|
||||||
|
exec "$D"/libexec/llama.cpp "$@"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
exec "$D"/libexec/llama.cpp "$@" --model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin"
|
||||||
|
|
|
@ -1,39 +1,54 @@
|
||||||
distributable:
|
distributable:
|
||||||
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/master-8b67998.tar.gz
|
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/master-fff0e0e.tar.gz
|
||||||
strip-components: 1
|
strip-components: 1
|
||||||
|
|
||||||
versions:
|
versions:
|
||||||
- 2023.04.11
|
- 2023.07.20
|
||||||
|
|
||||||
provides:
|
provides:
|
||||||
- bin/llama.cpp
|
- bin/llama.cpp
|
||||||
|
# NOTE! we do not “provide” convert.py. ∵ it’s too generic
|
||||||
|
# do `tea +github.com∕ggerganov∕llama.cpp convert.py`
|
||||||
|
|
||||||
|
platforms:
|
||||||
|
- linux
|
||||||
|
- darwin/aarch64
|
||||||
|
# Illegal instruction: 4 on darwin/x86-64
|
||||||
|
|
||||||
|
dependencies:
|
||||||
|
python.org: ^3.11
|
||||||
|
tea.xyz: ^0 # the scripts use tea/cli
|
||||||
|
|
||||||
build:
|
build:
|
||||||
dependencies:
|
dependencies:
|
||||||
tea.xyz/gx/cc: c99
|
tea.xyz/gx/cc: c99
|
||||||
tea.xyz/gx/make: '*'
|
tea.xyz/gx/make: '*'
|
||||||
freedesktop.org/pkg-config: ~0.29
|
gnu.org/coreutils: '*'
|
||||||
gnu.org/wget: '*'
|
env:
|
||||||
script: |
|
VIRTUAL_ENV: ${{prefix}}/venv
|
||||||
mkdir -p {{prefix}}/bin {{prefix}}/tbin {{prefix}}/share
|
script:
|
||||||
|
- |
|
||||||
|
make --jobs {{hw.concurrency}}
|
||||||
|
install -D main {{prefix}}/libexec/llama.cpp
|
||||||
|
install -D props/llama.cpp {{prefix}}/bin/llama.cpp
|
||||||
|
|
||||||
make
|
- |
|
||||||
mv main {{prefix}}/tbin/llama.cpp
|
mkdir -p {{prefix}}/share
|
||||||
|
mv prompts {{prefix}}/share
|
||||||
|
mv props/llama-fetch {{prefix}}/libexec
|
||||||
|
|
||||||
mv props/llama.cpp {{prefix}}/bin
|
- |
|
||||||
|
install -D convert.py $VIRTUAL_ENV/bin/convert.py
|
||||||
|
python-venv-stubber.sh convert.py
|
||||||
|
|
||||||
mv props/llama-fetch {{prefix}}/tbin
|
- python -m venv $VIRTUAL_ENV
|
||||||
mv $SRCROOT/prompts {{prefix}}/share
|
- |
|
||||||
|
source $VIRTUAL_ENV/bin/activate
|
||||||
mv $SRCROOT/*.py {{prefix}}/tbin
|
pip install -r requirements.txt
|
||||||
mv quantize {{prefix}}/tbin/quantize
|
deactivate
|
||||||
|
|
||||||
wget \
|
|
||||||
--no-check-certificate \
|
|
||||||
'https://gist.githubusercontent.com/eiz/828bddec6162a023114ce19146cb2b82/raw/6b1d2b192815e6d61386a9a8853f2c3293b3f568/gistfile1.txt' \
|
|
||||||
-O {{prefix}}/tbin/upgrade-model.py
|
|
||||||
chmod +x {{prefix}}/tbin/upgrade-model.py
|
|
||||||
|
|
||||||
test: |
|
test: |
|
||||||
{{prefix}}/tbin/llama.cpp --help
|
{{prefix}}/libexec/llama.cpp --help
|
||||||
# testing more than this requires downloading the models 😬
|
# testing more than this requires downloading the models 😬
|
||||||
|
|
||||||
|
entrypoint: llama.cpp
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
[project]
|
|
||||||
requires-python = ">=3.7"
|
|
||||||
name = "llama.cpp"
|
|
||||||
version = '0.0.0'
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
"sentencepiece>=0.1.96"
|
|
||||||
]
|
|
Loading…
Reference in a new issue