Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m5stack/StackFlow
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: main
Choose a base ref
...
head repository: m5stack/StackFlow
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: dev
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 12 commits
  • 17 files changed
  • 2 contributors

Commits on Mar 18, 2025

  1. [fix] Fix python tokenizer server cannot exit

    LittleMouse committed Mar 18, 2025
    Copy the full SHA
    3ad41bc View commit details
  2. Copy the full SHA
    aa6013d View commit details

Commits on Mar 20, 2025

  1. [update] Added logic to stop inference before unit exit

    LittleMouse committed Mar 20, 2025
    Copy the full SHA
    5b1cdb1 View commit details
  2. [update] Avoid using the same tokenizer service port for LLM and VLLM…

    … at the same time.
    LittleMouse committed Mar 20, 2025
    Copy the full SHA
    9a2d338 View commit details
  3. [update] move the main_vlm model config file

    LittleMouse committed Mar 20, 2025
    Copy the full SHA
    d5eb3c1 View commit details
  4. [fix] Fix melotts model config

    LittleMouse committed Mar 20, 2025
    Copy the full SHA
    9e95d23 View commit details

Commits on Mar 21, 2025

  1. [fix] Fix model name

    LittleMouse committed Mar 21, 2025
    Copy the full SHA
    9274c00 View commit details
  2. Copy the full SHA
    a4e0cca View commit details
  3. [fix] inference_async error

    dianjixz committed Mar 21, 2025
    Copy the full SHA
    b0cce96 View commit details
  4. [update] tokenizer

    dianjixz committed Mar 21, 2025
    Copy the full SHA
    d0b3a36 View commit details

Commits on Mar 24, 2025

  1. [update] update llm_llm doc

    LittleMouse committed Mar 24, 2025
    Copy the full SHA
    57e9f64 View commit details

Commits on Mar 26, 2025

  1. [update] add whisper-base model

    LittleMouse committed Mar 26, 2025
    Copy the full SHA
    78fe031 View commit details
69 changes: 69 additions & 0 deletions doc/projects_llm_framework_doc/llm_llm_en.md
Original file line number Diff line number Diff line change
@@ -55,6 +55,75 @@ Response json:
- created: Message creation time, unix time.
- work_id: The successfully created work_id unit.

## inference

### streaming input

```json
{
"request_id": "2",
"work_id": "llm.1003",
"action": "inference",
"object": "llm.utf-8.stream",
"data": {
"delta": "What's ur name?",
"index": 0,
"finish": true
}
}
```
- object: The data type transmitted is llm.utf-8.stream, indicating a streaming input from the user's UTF-8.
- delta: Segment data of the streaming input.
- index: Index of the segment in the streaming input.
- finish: A flag indicating whether the streaming input has completed.

### non-streaming input

```json
{
"request_id": "2",
"work_id": "llm.1003",
"action": "inference",
"object": "llm.utf-8",
"data": "What's ur name?"
}
```

- object: The data type transmitted is llm.utf-8, indicating a non-streaming input from the user's UTF-8.
- data: Data for non-streaming input.

streaming response json:

```json
{"created":1742779468,"data":{"delta":"I am not","finish":false,"index":0},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779469,"data":{"delta":" a person,","finish":false,"index":1},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779469,"data":{"delta":" but I'm","finish":false,"index":2},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779470,"data":{"delta":" here to assist","finish":false,"index":3},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779471,"data":{"delta":" you with any","finish":false,"index":4},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779472,"data":{"delta":" questions or tasks","finish":false,"index":5},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779473,"data":{"delta":" you may have","finish":false,"index":6},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779474,"data":{"delta":". How can","finish":false,"index":7},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779474,"data":{"delta":" I help you","finish":false,"index":8},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779475,"data":{"delta":" today?","finish":false,"index":9},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779475,"data":{"delta":"","finish":true,"index":10},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
```

non-streaming response json:

```json
{
"created": 1742780120,
"data": "As an artificial intelligence, I don't have a name in the traditional sense. However, I am here to assist you with any questions or information you may need. How can I help you today?",
"error": {
"code": 0,
"message": ""
},
"object": "llm.utf-8",
"request_id": "2",
"work_id": "llm.1003"
}
```

## link

Link the output of the upper unit.
68 changes: 68 additions & 0 deletions doc/projects_llm_framework_doc/llm_llm_zh.md
Original file line number Diff line number Diff line change
@@ -55,6 +55,74 @@
- created:消息创建时间,unix 时间。
- work_id:返回成功创建的 work_id 单元。

## inference

### 流式输入

```json
{
"request_id": "2",
"work_id": "llm.1003",
"action": "inference",
"object": "llm.utf-8.stream",
"data": {
"delta": "What's ur name?",
"index": 0,
"finish": true
}
}
```
- object:传输的数据类型为 `llm.utf-8.stream` 代表的是从用户 utf-8 的流式输入
- delta:流式输入的分段数据
- index:流式输入的分段索引
- finish:流式输入是否完成的标志位

### 非流式输入

```json
{
"request_id": "2",
"work_id": "llm.1003",
"action": "inference",
"object": "llm.utf-8",
"data": "What's ur name?"
}
```
- object:传输的数据类型为 `llm.utf-8` 代表的是从用户 utf-8 的非流式输入
- data:非流式输入的数据

流式响应 json:

```json
{"created":1742779468,"data":{"delta":"I am not","finish":false,"index":0},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779469,"data":{"delta":" a person,","finish":false,"index":1},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779469,"data":{"delta":" but I'm","finish":false,"index":2},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779470,"data":{"delta":" here to assist","finish":false,"index":3},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779471,"data":{"delta":" you with any","finish":false,"index":4},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779472,"data":{"delta":" questions or tasks","finish":false,"index":5},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779473,"data":{"delta":" you may have","finish":false,"index":6},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779474,"data":{"delta":". How can","finish":false,"index":7},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779474,"data":{"delta":" I help you","finish":false,"index":8},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779475,"data":{"delta":" today?","finish":false,"index":9},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
{"created":1742779475,"data":{"delta":"","finish":true,"index":10},"error":{"code":0,"message":""},"object":"llm.utf-8.stream","request_id":"2","work_id":"llm.1003"}
```

非流式响应 json:

```json
{
"created": 1742780120,
"data": "As an artificial intelligence, I don't have a name in the traditional sense. However, I am here to assist you with any questions or information you may need. How can I help you today?",
"error": {
"code": 0,
"message": ""
},
"object": "llm.utf-8",
"request_id": "2",
"work_id": "llm.1003"
}
```

## link

链接上级单元的输出。
56 changes: 54 additions & 2 deletions projects/llm_framework/main_llm/scripts/llm-llm_tokenizer_auto.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,43 @@
import json
import argparse
import sys
import base64

def replace_base64_in_jrpcobj(jrpc_obj):
"""
Traverse and replace specific structures in a JRpcObj object.
If {"type": "bytes.base64", "encode": "bmloYW8K"} is found, replace it with the decoded value "nihao".
"""
if isinstance(jrpc_obj, dict):
# If it is a dictionary, check if it meets the condition
if jrpc_obj.get("type") == "bytes.base64" and "encode" in jrpc_obj:
try:
# Attempt to decode and replace
decoded_value = base64.b64decode(jrpc_obj["encode"])
return decoded_value # Replace with the decoded value
except Exception as e:
# print(f"Decoding error: {e}")
return jrpc_obj # If decoding fails, return the original object
elif jrpc_obj.get("type") == "str.base64" and "encode" in jrpc_obj:
try:
# Attempt to decode and replace
decoded_value = base64.b64decode(jrpc_obj["encode"]).decode("utf-8")
return decoded_value # Replace with the decoded value
except Exception as e:
# print(f"Decoding error: {e}")
return jrpc_obj # If decoding fails, return the original object
else:
# If conditions are not met, recursively process the dictionary's values
return {
key: replace_base64_in_jrpcobj(value) for key, value in jrpc_obj.items()
}
elif isinstance(jrpc_obj, list):
# If it is a list, recursively process each element
return [replace_base64_in_jrpcobj(item) for item in jrpc_obj]
else:
# If it is another type, return it directly
return jrpc_obj


def send_msg(content, chunk_size=1024):
data = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
@@ -13,12 +50,26 @@ def send_msg(content, chunk_size=1024):
sys.stdout.write(data[-1]+'\n')
sys.stdout.flush()

def str2bool(value):
if isinstance(value, bool):
return value
if value.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif value.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')

args = argparse.ArgumentParser()
args.add_argument('--model_id', type=str, default='qwen2.5_coder_tokenizer')
args.add_argument('--content', type=str, default='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.')
args.add_argument('--trust_remote_code', type=str2bool, nargs='?', const=True, default=None)
args.add_argument('--use_fast', type=str2bool, nargs='?', const=False, default=None)
args = args.parse_args()
tokenizer = AutoTokenizer.from_pretrained(args.model_id)
if args.trust_remote_code is None or args.use_fast is None:
tokenizer = AutoTokenizer.from_pretrained(args.model_id)
else:
tokenizer = AutoTokenizer.from_pretrained(args.model_id, trust_remote_code=args.trust_remote_code, use_fast=args.use_fast)
JRpcResultObj = {
'jsonrpc': "2.0",
'result': {"bos_id": tokenizer.bos_token_id, "eos_id": tokenizer.eos_token_id},
@@ -34,7 +85,8 @@ def send_msg(content, chunk_size=1024):
else:
line += part
try:
JRpcObj = json.loads(line)
JRpcObjsrc = json.loads(line)
JRpcObj = replace_base64_in_jrpcobj(JRpcObjsrc)
RpcMethod = getattr(tokenizer, JRpcObj['method'])
if callable(RpcMethod):
result = RpcMethod(*JRpcObj['params'][0],**JRpcObj['params'][1])
57 changes: 43 additions & 14 deletions projects/llm_framework/main_llm/src/main.cpp
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@
#include <signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <base64.h>
#include <fstream>
@@ -41,7 +42,13 @@ typedef std::function<void(const std::string &data, bool finish)> task_callback_

class llm_task {
private:
static std::atomic<unsigned int> next_port_;
std::atomic_bool tokenizer_server_flage_;
unsigned int port_;
pid_t tokenizer_pid_ = -1;

public:
enum inference_status { INFERENCE_NONE = 0, INFERENCE_RUNNING };
LLMAttrType mode_config_;
std::unique_ptr<LLM> lLaMa_;
std::string model_;
@@ -51,9 +58,8 @@ class llm_task {
task_callback_t out_callback_;
bool enoutput_;
bool enstream_;
std::atomic_bool tokenizer_server_flage_;
unsigned int port_ = 8080;
sem_t inference_semaphore;
std::atomic_int inference_status_;
std::unique_ptr<std::thread> inference_run_;
std::atomic_bool is_running_;
std::string _inference_msg;
@@ -132,6 +138,7 @@ class llm_task {
CONFIG_AUTO_SET(file_body["mode_param"], top_p);

if (mode_config_.filename_tokenizer_model.find("http:") != std::string::npos) {
mode_config_.filename_tokenizer_model = "http://localhost:" + std::to_string(port_);
std::string tokenizer_file;
if (file_exists(std::string("/opt/m5stack/scripts/") + model_ + std::string("_tokenizer.py"))) {
tokenizer_file = std::string("/opt/m5stack/scripts/") + model_ + std::string("_tokenizer.py");
@@ -146,16 +153,16 @@ class llm_task {
__log += " not found!";
SLOGE("%s", __log.c_str());
}
if (!tokenizer_server_flage_) {
pid_t pid = fork();
if (pid == 0) {
if (!tokenizer_server_flage_.load()) {
tokenizer_pid_ = fork();
if (tokenizer_pid_ == 0) {
execl("/usr/bin/python3", "python3", tokenizer_file.c_str(), "--host", "localhost", "--port",
std::to_string(port_).c_str(), "--model_id", (base_model + "tokenizer").c_str(),
"--content", ("'" + prompt_ + "'").c_str(), nullptr);
perror("execl failed");
exit(1);
}
tokenizer_server_flage_ = true;
tokenizer_server_flage_.store(true);
SLOGI("port_=%s model_id=%s content=%s", std::to_string(port_).c_str(),
(base_model + "tokenizer").c_str(), ("'" + prompt_ + "'").c_str());
std::this_thread::sleep_for(std::chrono::seconds(15));
@@ -219,23 +226,21 @@ class llm_task {
sem_wait(&inference_semaphore);
while (is_running_) {
{
sem_wait(&inference_semaphore);
inference(_inference_msg);
inference_status_--;
sem_wait(&inference_semaphore);
}
}
}

int inference_async(const std::string &msg)
{
int count = 0;
sem_getvalue(&inference_semaphore, &count);
if (count == 0) {
_inference_msg = msg;
sem_post(&inference_semaphore);
if (inference_status_ == INFERENCE_NONE) {
_inference_msg = msg;
inference_status_ = INFERENCE_RUNNING;
sem_post(&inference_semaphore);
}
return count;
return inference_status_;
}

void inference(const std::string &msg)
@@ -287,13 +292,29 @@ class llm_task {

bool delete_model()
{
if (tokenizer_pid_ != -1) {
kill(tokenizer_pid_, SIGTERM);
waitpid(tokenizer_pid_, nullptr, 0);
tokenizer_pid_ = -1;
}
lLaMa_->Deinit();
lLaMa_.reset();
return true;
}

llm_task(const std::string &workid)
static unsigned int getNextPort()
{
unsigned int port = next_port_++;
if (port > 8089) {
next_port_ = 8080;
port = 8080;
}
return port;
}

llm_task(const std::string &workid) : tokenizer_server_flage_(false), port_(getNextPort())
{
inference_status_ = INFERENCE_NONE;
sem_init(&inference_semaphore, 0, 0);
is_running_ = true;
inference_run_ = std::make_unique<std::thread>(std::bind(&llm_task::run, this));
@@ -304,12 +325,19 @@ class llm_task {
is_running_ = false;
sem_post(&inference_semaphore);
if (inference_run_) inference_run_->join();
if (tokenizer_pid_ != -1) {
kill(tokenizer_pid_, SIGTERM);
waitpid(tokenizer_pid_, nullptr, WNOHANG);
}
if (lLaMa_) {
lLaMa_->Deinit();
}
sem_destroy(&inference_semaphore);
}
};

std::atomic<unsigned int> llm_task::next_port_{8080};

#undef CONFIG_AUTO_SET

class llm_llm : public StackFlow {
@@ -619,6 +647,7 @@ class llm_llm : public StackFlow {
send("None", "None", error_body, work_id);
return -1;
}
task_pause(llm_task_[work_id_num], get_channel(work_id_num));
auto llm_channel = get_channel(work_id_num);
llm_channel->stop_subscriber("");
llm_task_[work_id_num]->lLaMa_->Stop();
10 changes: 10 additions & 0 deletions projects/llm_framework/main_llm/src/runner/Tokenizer/Tokenizer.cpp
Original file line number Diff line number Diff line change
@@ -860,6 +860,16 @@ class Tokenizer_Auto : public BaseTokenizer {
msg["content"] = message.second;
messages_list.push_back(msg);
} break;
case ROLE_TOOL: {
msg["role"] = "tool";
msg["content"] = message.second;
messages_list.push_back(msg);
} break;
case ROLE_IPYTHON: {
msg["role"] = "ipython";
msg["content"] = message.second;
messages_list.push_back(msg);
} break;
case ROLE_ASSISTANT: {
msg["role"] = "assistant";
msg["content"] = message.second;
Loading