chore: ⬆️ Update TheTom/llama-cpp-turboquant to `45f8a066ed5f5bb38c695cec532f6cef9f4efa9d' (#9385)

chore: ⬆️ Update TheTom/llama-cpp-turboquant to `45f8a066ed5f5bb38c695cec532f6cef9f4efa9d` Drop 0002-ggml-rpc-bump-op-count-to-97.patch; the fork now has GGML_OP_COUNT == 97 and RPC_PROTO_PATCH_VERSION 2 upstream. Fetch all tags in backend/cpp/llama-cpp/Makefile so tag-only commits (the new turboquant pin is reachable only through the tag feature-turboquant-kv-cache-b8821-45f8a06) can be checked out.
2026-04-21 13:27:21 +00:00 · 2026-04-17 08:12:21 +02:00 · 2026-04-17 08:12:21 +02:00 · 5837b14888
commit 5837b14888
parent b6a68e5df4
3 changed files with 2 additions and 15 deletions
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@ -132,7 +132,7 @@ llama.cpp:
 	cd llama.cpp && \
 	git init && \
 	git remote add origin $(LLAMA_REPO)  && \
-	git fetch origin && \
+	git fetch --all --tags && \
 	git checkout -b build $(LLAMA_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch

--- a/backend/cpp/turboquant/Makefile
+++ b/backend/cpp/turboquant/Makefile
@ -1,7 +1,7 @@

 # Pinned to the HEAD of feature/turboquant-kv-cache on https://github.com/TheTom/llama-cpp-turboquant.
 # Auto-bumped nightly by .github/workflows/bump_deps.yaml.
-TURBOQUANT_VERSION?=8590cbff961dbaf1d3a9793fd11d402e248869b9
+TURBOQUANT_VERSION?=45f8a066ed5f5bb38c695cec532f6cef9f4efa9d
 LLAMA_REPO?=https://github.com/TheTom/llama-cpp-turboquant

 CMAKE_ARGS?=
--- a/backend/cpp/turboquant/patches/0002-ggml-rpc-bump-op-count-to-97.patch
+++ b/backend/cpp/turboquant/patches/0002-ggml-rpc-bump-op-count-to-97.patch
@ -1,13 +0,0 @@
-diff --git a/ggml/include/ggml-rpc.h b/ggml/include/ggml-rpc.h
-index 1c11495..31af239 100644
--- a/ggml/include/ggml-rpc.h
-+++ b/ggml/include/ggml-rpc.h
-@@ -11,7 +11,7 @@ extern "C" {
- #define RPC_PROTO_PATCH_VERSION    1
- 
- #ifdef  __cplusplus
-static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT has changed - update RPC_PROTO_PATCH_VERSION");
-+static_assert(GGML_OP_COUNT == 97, "GGML_OP_COUNT has changed - update RPC_PROTO_PATCH_VERSION");
- #endif
- 
- #define GGML_RPC_MAX_SERVERS       16