GitHub.com/ggerganov/llama.cpp update (#3696)

* llama.cpp, github version instead of hardcoded version * llama.cpp, check if model is specified, if yes, run it, if not, then download model * Use entrypoint for custom llama.cpp invocation * `llama.cpp` is just raw executable. This I think is our new pattern. * To run chat use the entrypoint: `pkgx +brewkit -- run llama.cpp` Co-authored-by: James Reynolds <magnsuviri@me.com> Co-authored-by: Max Howell <mxcl@me.com>
2025-06-08 08:20:32 +03:00 · 2023-10-26 05:24:04 -06:00 · 2023-10-26 05:24:04 -06:00 · 2b06942c62
commit 2b06942c62
parent d3ba600cd5
5 changed files with 53 additions and 87 deletions
--- a/projects/github.com/ggerganov/llama.cpp/README.md
+++ b/projects/github.com/ggerganov/llama.cpp/README.md
@ -1,14 +1,12 @@
 # getting started

 ```sh
-$ llama.cpp
-# ^^ default chat prompt with the OpenLLaMA model
+$ pkgx +brewkit -- run llama.cpp
+# ^^ default chat prompt with an appropriate hugging face model
 ```

-If you want to run `llama.cpp` with your own args specify them and chat mode
-will be skipped.
-
-If you want to use a different model specify `--model`.
+If you want to run `llama.cpp` with your own args `pkgx llama.cpp $ARGS` is
+your friend.

 # converting your own models

@ -16,5 +14,7 @@ We provide a working `convert.py` from the llama.cpp project. To use it you
 need to launch it via a tea pkgenv:

 ```sh
-tea +github.com/ggerganov/llama.cpp convert.py path/to/your/model
+pkgx +llama.cpp -- convert.py path/to/your/model
+# ^^ the -- is necessary since `convert.py` is a not listed in the llama.cpp
+# provides list
 ```
--- a/projects/github.com/ggerganov/llama.cpp/entrypoint.sh
+++ b/projects/github.com/ggerganov/llama.cpp/entrypoint.sh
@ -0,0 +1,32 @@
+#!/usr/bin/env -S pkgx +gum +aria2c bash
+
+set -eo pipefail
+test -n "$VERBOSE" && set -x
+
+MODEL_URL="https://huggingface.co/TheBloke/dolphin-2.1-mistral-7B-GGUF/resolve/main/dolphin-2.1-mistral-7b.Q4_0.gguf"
+MODEL_FILENAME=$(basename "$MODEL_URL")
+MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}"/llama.cpp
+
+if [ ! -f "$MODEL_DIR/$MODEL_FILENAME" ]; then
+  gum format <<EoMD
+  # downloading $MODEL_FILENAME
+  models will be placed: \`$PWD\`
+  > this may take a a few minutes…
+EoMD
+  echo #spacer
+  mkdir -p "$MODEL_DIR"
+  aria2c "$MODEL_URL" --dir="$MODEL_DIR"
+  gum format "# All done!"
+  echo #spacer
+fi
+
+D="$(cd "$(dirname "$0")" && pwd)"
+
+exec "$D"/bin/llama.cpp \
+    --model "$MODEL_DIR/$MODEL_FILENAME" \
+    -n 256 \
+    --repeat_penalty 1.0 \
+    --color \
+    --interactive \
+    --reverse-prompt "User:" \
+    --file "$D"/share/prompts/chat-with-bob.txt
--- a/projects/github.com/ggerganov/llama.cpp/llama-fetch
+++ b/projects/github.com/ggerganov/llama.cpp/llama-fetch
@ -1,31 +0,0 @@
-#!/bin/sh
-
-set -e
-test -n "$VERBOSE" && set -x
-
-if test -f "$1"/VERSION && test $(cat "$1"/VERSION) = $2; then
-  exit
-fi
-
-mkdir -p "$1"
-cd "$1"
-
-tea gum format <<EoMD
-# downloading OpenLLaMA 3Bv2
-models will be placed: \`$PWD\`
-> this may take a a few minutes…
-EoMD
-
-tea wget \
-  --continue \
-  --output-document OpenLLaMA-3Bv2.ggml.f16.bin \
-  'https://huggingface.co/SlyEcho/open_llama_3b_v2_ggml/resolve/main/open-llama-3b-v2-f16.bin'
-
-tea wget \
-  --continue \
-  'https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model'
-
-echo $2 > VERSION
-
-tea gum format "# All done!"
-echo  #spacer
--- a/projects/github.com/ggerganov/llama.cpp/llama.cpp
+++ b/projects/github.com/ggerganov/llama.cpp/llama.cpp
@ -1,37 +0,0 @@
-#!/bin/sh
-
-set -e
-test -n "$VERBOSE" && set -x
-
-D="$(cd "$(dirname "$0")"/.. && pwd)"
-VERSION="$(basename "$D")"
-MODEL_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/models/OpenLLaMA"
-
-"$D"/libexec/llama-fetch "$MODEL_DIR" "$VERSION"
-
-if [ "$1" = '--fetch' ]; then
-  exit
-fi
-
-if [ $# -eq 0 ]; then
-  exec "$D"/libexec/llama.cpp \
-    --model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin" \
-    --ctx-size 512 \
-    --batch-size 1024 \
-    --n-predict 256 \
-    --keep 48 \
-    --repeat_penalty 1.0 \
-    --color \
-    --interactive \
-    --reverse-prompt "User:" \
-    --file "$D"/share/prompts/chat-with-bob.txt
-fi
-
-# detect --model arg or not
-for arg in "$@"; do
-  if [ "$arg" = "--model" -o  "$arg" = "-m" ]; then
-    exec "$D"/libexec/llama.cpp "$@"
-  fi
-done
-
-exec "$D"/libexec/llama.cpp "$@" --model "$MODEL_DIR/OpenLLaMA-3Bv2.ggml.f16.bin"
--- a/projects/github.com/ggerganov/llama.cpp/package.yml
+++ b/projects/github.com/ggerganov/llama.cpp/package.yml
@ -1,9 +1,10 @@
 distributable:
-  url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/master-fff0e0e.tar.gz
+  url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/b{{version.raw}}.tar.gz
  strip-components: 1

 versions:
-  - 2023.07.20
+  github: ggerganov/llama.cpp/tags
+  strip: /^b/

 display-name:
  LLaMA.cpp
@ -20,7 +21,7 @@ platforms:

 dependencies:
  python.org: ~3.11
-  tea.xyz: ^0  # the scripts use tea/cli
+  pkgx.sh: ^1

 build:
  dependencies:
@ -28,28 +29,29 @@ build:
  env:
    VIRTUAL_ENV: ${{prefix}}/venv
  script:
+    - make --jobs {{hw.concurrency}}
+
    - |
-      make --jobs {{hw.concurrency}}
-      install -D main {{prefix}}/libexec/llama.cpp
-      install -D props/llama.cpp {{prefix}}/bin/llama.cpp
+      install -D main {{prefix}}/bin/llama.cpp
+      install -D props/entrypoint.sh {{prefix}}/entrypoint.sh
+      install -D ggml-metal.metal {{prefix}}/bin/ggml-metal.metal

    - |
      mkdir -p {{prefix}}/share
      mv prompts {{prefix}}/share
-      mv props/llama-fetch {{prefix}}/libexec

    - |
      install -D convert.py $VIRTUAL_ENV/bin/convert.py
      python-venv-stubber.sh convert.py

-    - python -m venv $VIRTUAL_ENV
    - |
+      python -m venv $VIRTUAL_ENV
      source $VIRTUAL_ENV/bin/activate
      pip install -r requirements.txt
      deactivate

-test: |
-  {{prefix}}/libexec/llama.cpp --help
-  # testing more than this requires downloading the models 😬
+test:
+  '{{prefix}}/bin/llama.cpp --help'
+  # ^^ testing more than this requires downloading the models 😬

-entrypoint: llama.cpp
+entrypoint: ./entrypoint.sh