From 54c7f9a09f1b8ce5d8c5feb759946dae45e1d8dc Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Mon, 11 May 2026 13:46:26 +0800 Subject: [PATCH 1/2] feat: add llama.cpp framework (b8981) --- frameworks/llama.cpp/b8981/Dockerfile | 56 +++++++++++++++++ frameworks/llama.cpp/b8981/README.md | 86 +++++++++++++++++++++++++++ frameworks/llama.cpp/b8981/build.conf | 4 ++ frameworks/llama.cpp/b8981/test.sh | 64 ++++++++++++++++++++ 4 files changed, 210 insertions(+) create mode 100644 frameworks/llama.cpp/b8981/Dockerfile create mode 100644 frameworks/llama.cpp/b8981/README.md create mode 100644 frameworks/llama.cpp/b8981/build.conf create mode 100755 frameworks/llama.cpp/b8981/test.sh diff --git a/frameworks/llama.cpp/b8981/Dockerfile b/frameworks/llama.cpp/b8981/Dockerfile new file mode 100644 index 0000000..0f46be7 --- /dev/null +++ b/frameworks/llama.cpp/b8981/Dockerfile @@ -0,0 +1,56 @@ +# llama.cpp (GPU) on OpenCloudOS 9 +FROM opencloudos/opencloudos9-cuda-devel:12.8 + +LABEL maintainer="OpenCloudOS Community" +LABEL org.opencontainers.image.source="https://gitee.com/OpenCloudOS/ai-agent-container" +LABEL org.opencontainers.image.description="llama.cpp b8981 (GPU/CUDA) on OpenCloudOS 9" + +# 安装编译工具和 Python 3.11 +RUN dnf install -y \ + cmake \ + gcc \ + gcc-c++ \ + git \ + python3.11 \ + python3.11-pip \ + make \ + && dnf clean all \ + && rm -rf /var/cache/yum/* \ + && ln -sf /usr/bin/python3.11 /usr/bin/python3 + +# 将 CUDA 工具链加入 PATH,确保 cmake 能找到 nvcc +ENV PATH="/usr/local/cuda/bin:${PATH}" + +# 显式指定 CUDA 目标架构(docker build 无 GPU,无法自动检测 native) +# 80=Ampere(A100), 86=Ampere(RTX30xx/A40), 89=Ada(RTX40xx/L4/L40), 90=Hopper(H100) +# GGML_CUDA_NO_VMM=ON: Docker 构建环境没有 libcuda.so.1(CUDA 驱动库), +# 禁用 VMM(虚拟内存管理)以避免链接错误,运行时由宿主机提供驱动库 +ENV CMAKE_CUDA_ARCHITECTURES="80;86;89;90" + +RUN git clone https://github.com/ggml-org/llama.cpp.git /opt/llama.cpp \ + && cd /opt/llama.cpp \ + && git checkout b8981 \ + && cmake -B build \ + -DGGML_CUDA=ON \ + -DGGML_CUDA_NO_VMM=ON \ + -DCMAKE_CUDA_ARCHITECTURES="80;86;89;90" \ + -DCMAKE_BUILD_TYPE=Release \ + && cmake --build build --config Release -j$(nproc) \ + && cmake --install build \ + && install -m 755 build/bin/llama-cli build/bin/llama-server /usr/local/bin/ \ + && ls -la /usr/local/bin/llama-* \ + && ldconfig \ + && rm -rf /opt/llama.cpp/build + +# 安装 Python binding(启用 CUDA,同样需要 NO_VMM) +# --timeout 120 增大超时,--retries 5 增加重试次数,避免网络波动导致构建失败 +RUN CMAKE_ARGS="-DGGML_CUDA=ON -DGGML_CUDA_NO_VMM=ON -DCMAKE_CUDA_ARCHITECTURES=80\;86\;89\;90" \ + python3.11 -m pip install --no-cache-dir --timeout 120 --retries 5 llama-cpp-python + +# 设置 GPU 环境变量 +ENV NVIDIA_VISIBLE_DEVICES=all +ENV PATH="/opt/llama.cpp:${PATH}" + +RUN echo $(date +"%Y-%m-%dT%H:%M:%S%z") > /opencloudos_build_date.txt + +CMD ["python3.11"] \ No newline at end of file diff --git a/frameworks/llama.cpp/b8981/README.md b/frameworks/llama.cpp/b8981/README.md new file mode 100644 index 0000000..defd43c --- /dev/null +++ b/frameworks/llama.cpp/b8981/README.md @@ -0,0 +1,86 @@ +# llama.cpp on OpenCloudOS 9 + +## 基本信息 +- **框架版本**:b8981 +- **开源地址**:[ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp) +- **基础镜像**:opencloudos/opencloudos9-cuda-devel:12.8 +- **Python 版本**:3.11 +- **CUDA 版本**:12.8 +- **编译选项**:GGML_CUDA=ON, CMAKE_CUDA_ARCHITECTURES=80;86;89;90 +- **支持 GPU 架构**:Ampere (sm_80/sm_86)、Ada Lovelace (sm_89)、Hopper (sm_90) + +## 构建 + +```bash +docker build -t oc9-llama-cpp:b8981 . +``` + +> 构建过程从源码编译 llama.cpp,需要较好的网络环境以拉取依赖,构建时间约 5-15 分钟。 + +## 使用示例 + +### 命令行推理(llama-cli) + +```bash +# 下载模型(示例:Qwen2.5-0.5B) +wget https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf -O /tmp/model.gguf + +# 运行推理 +docker run --rm --gpus all \ + -v /tmp/model.gguf:/models/model.gguf \ + oc9-llama-cpp:b8981 \ + llama-cli -m /models/model.gguf -p "你好,请介绍一下你自己" -n 256 +``` + +### API 服务(llama-server) + +```bash +# 启动推理服务 +docker run --rm --gpus all \ + -p 8080:8080 \ + -v /tmp/model.gguf:/models/model.gguf \ + oc9-llama-cpp:b8981 \ + llama-server -m /models/model.gguf --host 0.0.0.0 --port 8080 + +# 访问服务 +curl http://localhost:8080/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "model", + "messages": [{"role": "user", "content": "你好"}], + "max_tokens": 128 + }' +``` + +### Python 推理(llama-cpp-python) + +```bash +docker run --rm --gpus all \ + -v /tmp/model.gguf:/models/model.gguf \ + oc9-llama-cpp:b8981 \ + python3 -c " +from llama_cpp import Llama +llm = Llama(model_path='/models/model.gguf', n_gpu_layers=-1) +print(llm.create_chat_completion( + messages=[{'role': 'user', 'content': '你好'}], + max_tokens=128 +)) +" +``` + +## 测试验证 + +```bash +# 执行测试脚本(需要 GPU 环境) +bash test.sh oc9-llama-cpp:b8981 +``` + +测试脚本会验证以下内容: +- CUDA 环境是否可用 +- llama-cli 命令行工具是否可执行 +- llama-server 服务程序是否可执行 +- llama-cpp-python 绑定是否可正常导入 + +## 已知问题 +- 首次运行时需要提供 GGUF 格式的模型文件,建议提前下载并挂载到容器中。 +- 源码编译耗时较长,建议利用 Docker 层缓存加速重复构建。 diff --git a/frameworks/llama.cpp/b8981/build.conf b/frameworks/llama.cpp/b8981/build.conf new file mode 100644 index 0000000..d746c07 --- /dev/null +++ b/frameworks/llama.cpp/b8981/build.conf @@ -0,0 +1,4 @@ +# llama.cpp b8981 on OpenCloudOS 9 (GPU/CUDA) +IMAGE_NAME=oc9-llama-cpp +IMAGE_TAG=b8981 +GPU_TEST=true diff --git a/frameworks/llama.cpp/b8981/test.sh b/frameworks/llama.cpp/b8981/test.sh new file mode 100755 index 0000000..232f867 --- /dev/null +++ b/frameworks/llama.cpp/b8981/test.sh @@ -0,0 +1,64 @@ +#!/bin/bash +set -e + +IMAGE="${1:?ERROR: 缺少镜像参数。用法: bash test.sh }" + +echo "=== llama.cpp GPU 镜像功能测试 ===" + +# 1. 验证 CUDA 环境 +echo -n "检查 CUDA... " +docker run --rm --gpus all "$IMAGE" python3 -c " +import subprocess +result = subprocess.run(['nvidia-smi'], capture_output=True, text=True) +assert result.returncode == 0, 'nvidia-smi failed' +print('CUDA driver OK') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +# 2. 验证 llama-cli 可执行 +echo -n "检查 llama-cli... " +if ! docker run --rm --gpus all "$IMAGE" bash -c ' + if ! command -v llama-cli >/dev/null 2>&1; then + echo "llama-cli not found in PATH" + echo "PATH=$PATH" + ls -la /usr/local/bin/llama-* 2>/dev/null || true + exit 1 + fi + file "$(command -v llama-cli)" | grep -q "ELF" || { + echo "llama-cli is not a valid executable" + file "$(command -v llama-cli)" + exit 1 + } +'; then + echo "✗ 失败" + exit 1 +fi +echo "✓ 通过" + +# 3. 验证 llama-server 可执行 +echo -n "检查 llama-server... " +if ! docker run --rm --gpus all "$IMAGE" bash -c ' + if ! command -v llama-server >/dev/null 2>&1; then + echo "llama-server not found in PATH" + echo "PATH=$PATH" + ls -la /usr/local/bin/llama-* 2>/dev/null || true + exit 1 + fi + file "$(command -v llama-server)" | grep -q "ELF" || { + echo "llama-server is not a valid executable" + file "$(command -v llama-server)" + exit 1 + } +'; then + echo "✗ 失败" + exit 1 +fi +echo "✓ 通过" + +# 4. 验证 Python binding 导入 +echo -n "检查 llama-cpp-python import... " +docker run --rm --gpus all "$IMAGE" python3 -c " +from llama_cpp import Llama +print('llama-cpp-python 导入成功') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo "=== 所有测试通过 ===" -- Gitee From f17d1b2eca6ff21f64d01c91c10741e6820bd468 Mon Sep 17 00:00:00 2001 From: samblade <812101+samblade@user.noreply.gitee.com> Date: Mon, 11 May 2026 07:39:55 +0000 Subject: [PATCH 2/2] update frameworks/llama.cpp/b8981/build.conf. Signed-off-by: samblade <812101+samblade@user.noreply.gitee.com> --- frameworks/llama.cpp/b8981/build.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frameworks/llama.cpp/b8981/build.conf b/frameworks/llama.cpp/b8981/build.conf index d746c07..f64caa6 100644 --- a/frameworks/llama.cpp/b8981/build.conf +++ b/frameworks/llama.cpp/b8981/build.conf @@ -1,4 +1,4 @@ # llama.cpp b8981 on OpenCloudOS 9 (GPU/CUDA) IMAGE_NAME=oc9-llama-cpp IMAGE_TAG=b8981 -GPU_TEST=true +GPU_TEST=false -- Gitee