forked from abetlen/llama-cpp-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
31 lines (18 loc) · 755 Bytes
/
server.py
File metadata and controls
31 lines (18 loc) · 755 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
"""llama-cpp-python server from scratch in a single file.
"""
# import llama_cpp
# path = b"../../models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf"
# model_params = llama_cpp.llama_model_default_params()
# model = llama_cpp.llama_load_model_from_file(path, model_params)
# if model is None:
# raise RuntimeError(f"Failed to load model from file: {path}")
# ctx_params = llama_cpp.llama_context_default_params()
# ctx = llama_cpp.llama_new_context_with_model(model, ctx_params)
# if ctx is None:
# raise RuntimeError("Failed to create context")
from fastapi import FastAPI
app = FastAPI()
import openai.types.chat as types
@app.post("/v1/chat/completions")
def create_chat_completions():
return {"message": "Hello World"}