Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: llama3.2
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
- zh
|
6 |
+
---
|
7 |
+
|
8 |
+
# Llama-Breeze2-8B-Instruct-v0_1
|
9 |
+
|
10 |
+
【[Paper](https://arxiv.org/abs/2501.13921)】◇【[Kaggle Demo](https://www.kaggle.com/code/ycckaggle/demo-breeze-2-8b)】◇【[Collection](https://huggingface.co/collections/MediaTek-Research/llama-breeze2-67863158443a06a72dd29900)】
|
11 |
+
|
12 |
+
**The Breeze 2 Herd of Models: Traditional Chinese LLMs Based on LLaMA with Vision-Aware and Function-Calling Capabilities**
|
13 |
+
|
14 |
+
Llama Breeze 2 is a suite of advanced multi-modal language models, available in 3B and 8B parameter configurations, specifically designed to enhance Traditional Chinese language representation.
|
15 |
+
Building upon the [LLaMA 3.2](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/), Breeze 2 continues pretraining on an extensive corpus to enhance the linguistic and cultural heritage of Traditional Chinese.
|
16 |
+
It incorporates vision-aware capabilities through a visual encoder and a bridge module, and supports function-calling via prompt templates and post-training on function-calling data.
|
17 |
+
|
18 |
+
*Llama 3.2 is licensed under the Llama 3.2 Community License, Copyright © Meta Platforms, Inc. All Rights Reserved.*
|
19 |
+
|
20 |
+
*We list all contributors in alphabetical order of their first names, as follows: Chan-Jan Hsu (許湛然), Chia-Sheng Liu (劉佳昇), Meng-Hsi Chen (陳孟羲), Muxi Chen (陳沐希), Po-Chun Hsu (許博竣), Yi-Chang Chen (陳宜昌), and the supervisor Da-Shan Shiu (許大山).*
|
21 |
+
|
22 |
+
## Installation
|
23 |
+
|
24 |
+
```
|
25 |
+
pip3 install transformers==4.47.0
|
26 |
+
pip3 install -U mtkresearch
|
27 |
+
```
|
28 |
+
|
29 |
+
```python
|
30 |
+
from transformers import AutoModel, AutoTokenizer
|
31 |
+
from transformers import GenerationConfig
|
32 |
+
import torch
|
33 |
+
from mtkresearch.llm.prompt import MRPromptV3
|
34 |
+
|
35 |
+
model_id = 'Qwe1325/Llama-Breeze2-8B-Instruct-v0_1_4bit'
|
36 |
+
model = AutoModel.from_pretrained(
|
37 |
+
model_id,
|
38 |
+
torch_dtype=torch.float16,
|
39 |
+
load_in_4bit=True,
|
40 |
+
low_cpu_mem_usage=True,
|
41 |
+
trust_remote_code=True,
|
42 |
+
device_map='auto',
|
43 |
+
img_context_token_id=128212
|
44 |
+
).eval()
|
45 |
+
|
46 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, use_fast=False)
|
47 |
+
|
48 |
+
generation_config = GenerationConfig(
|
49 |
+
max_new_tokens=2048,
|
50 |
+
do_sample=True,
|
51 |
+
temperature=0.01,
|
52 |
+
top_p=0.01,
|
53 |
+
repetition_penalty=1.1,
|
54 |
+
eos_token_id=128009
|
55 |
+
)
|
56 |
+
|
57 |
+
prompt_engine = MRPromptV3()
|
58 |
+
|
59 |
+
sys_prompt = 'You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan.'
|
60 |
+
|
61 |
+
def _inference(tokenizer, model, generation_config, prompt, pixel_values=None):
|
62 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
63 |
+
if pixel_values is None:
|
64 |
+
output_tensors = model.generate(**inputs, generation_config=generation_config)
|
65 |
+
else:
|
66 |
+
output_tensors = model.generate(**inputs, generation_config=generation_config, pixel_values=pixel_values.to(model.dtype))
|
67 |
+
output_str = tokenizer.decode(output_tensors[0])
|
68 |
+
return output_str
|
69 |
+
```
|
70 |
+
|
71 |
+
## Feature: Instruction Following
|
72 |
+
|
73 |
+
```python
|
74 |
+
conversations = [
|
75 |
+
{"role": "system", "content": sys_prompt},
|
76 |
+
{"role": "user", "content": "請問什麼是深度學習?"},
|
77 |
+
]
|
78 |
+
|
79 |
+
prompt = prompt_engine.get_prompt(conversations)
|
80 |
+
output_str = _inference(tokenizer, model, generation_config, prompt)
|
81 |
+
result = prompt_engine.parse_generated_str(output_str)
|
82 |
+
print(result)
|
83 |
+
# {'role': 'assistant', 'content': '深度學習是一種人工智慧技術,主要是透過模仿生物神經網路的結構和功能來實現。它利用大量數據進行訓練,以建立複雜的模型並使其能夠自主學習、預測或分類輸入資料。\n\n在深度學習中,通常使用多層的神經網路,每一層都包含許多相互連接的節點(稱為神經元)。這些神經元可以處理不同特徵的輸入資料,並將結果傳遞給下一層的神經元。隨著資料流向更高層次,這個過程逐漸捕捉到更抽象的概念或模式。\n\n深度學習已被廣泛應用於各種領域,如圖像識別、自然語言處理、語音識別以及遊戲等。它提供了比傳統機器學習方法更好的表現,因為它能夠從複雜且非線性的數據中提取出有用的資訊。'}
|
84 |
+
```
|
85 |
+
|
86 |
+
## Feature: Visual Instruction Following
|
87 |
+
|
88 |
+
Example Image:
|
89 |
+
|
90 |
+

|
91 |
+
|
92 |
+
```python
|
93 |
+
conversations = [
|
94 |
+
{"role": "system", "content": sys_prompt},
|
95 |
+
{"role": "user", "content": [
|
96 |
+
{
|
97 |
+
"type": "image",
|
98 |
+
"image_path": /path/to/example-image,
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"type": "text",
|
102 |
+
"text": "請問前三名總共可獲得多少錢?"
|
103 |
+
},
|
104 |
+
]},
|
105 |
+
]
|
106 |
+
|
107 |
+
prompt, pixel_values = prompt_engine.get_prompt(conversations)
|
108 |
+
output_str = _inference(tokenizer, model, generation_config, prompt, pixel_values=pixel_values)
|
109 |
+
result = prompt_engine.parse_generated_str(output_str)
|
110 |
+
print(result)
|
111 |
+
# {'role': 'assistant', 'content': '第一名可獲得30萬元,第二名可獲得20萬元,第三名可獲得15萬元。前三名總共可獲得65萬元。'}
|
112 |
+
```
|
113 |
+
|
114 |
+
|
115 |
+
## Feature: Function Calling
|
116 |
+
|
117 |
+
```python
|
118 |
+
import json
|
119 |
+
|
120 |
+
functions = [
|
121 |
+
{
|
122 |
+
"name": "get_current_weather",
|
123 |
+
"description": "Get the current weather in a given location",
|
124 |
+
"parameters": {
|
125 |
+
"type": "object",
|
126 |
+
"properties": {
|
127 |
+
"location": {
|
128 |
+
"type": "string",
|
129 |
+
"description": "The city and state, e.g. San Francisco, CA"
|
130 |
+
},
|
131 |
+
"unit": {
|
132 |
+
"type": "string",
|
133 |
+
"enum": ["celsius", "fahrenheit"]
|
134 |
+
}
|
135 |
+
},
|
136 |
+
"required": ["location"]
|
137 |
+
}
|
138 |
+
}
|
139 |
+
]
|
140 |
+
|
141 |
+
def fake_get_current_weather(location, unit=None):
|
142 |
+
return {'temperature': 30}
|
143 |
+
|
144 |
+
mapping = {
|
145 |
+
'get_current_weather': fake_get_current_weather
|
146 |
+
}
|
147 |
+
|
148 |
+
# stage 1: query
|
149 |
+
conversations = [
|
150 |
+
{"role": "user", "content": "請問台北目前溫度是攝氏幾度?"},
|
151 |
+
]
|
152 |
+
|
153 |
+
prompt = prompt_engine.get_prompt(conversations, functions=functions)
|
154 |
+
|
155 |
+
output_str = _inference(tokenizer, model, generation_config, prompt)
|
156 |
+
result = prompt_engine.parse_generated_str(output_str)
|
157 |
+
|
158 |
+
print(result)
|
159 |
+
# {'role': 'assistant', 'tool_calls': [{'id': 'call_0bcY2wePCVTg14Q6Xor93fHz', 'type': 'function', 'function': {'name': 'get_current_weather', 'arguments': '{"location": "台北", "unit": "celsius"}'}}]}
|
160 |
+
```
|
161 |
+
|
162 |
+
|
163 |
+
```python
|
164 |
+
# stage 2: execute called functions
|
165 |
+
conversations.append(result)
|
166 |
+
|
167 |
+
tool_call = result['tool_calls'][0]
|
168 |
+
func_name = tool_call['function']['name']
|
169 |
+
func = mapping[func_name]
|
170 |
+
arguments = json.loads(tool_call['function']['arguments'])
|
171 |
+
called_result = func(**arguments)
|
172 |
+
|
173 |
+
# stage 3: put executed results
|
174 |
+
conversations.append(
|
175 |
+
{
|
176 |
+
'role': 'tool',
|
177 |
+
'tool_call_id': tool_call['id'],
|
178 |
+
'name': func_name,
|
179 |
+
'content': json.dumps(called_result)
|
180 |
+
}
|
181 |
+
)
|
182 |
+
|
183 |
+
prompt = prompt_engine.get_prompt(conversations, functions=functions)
|
184 |
+
|
185 |
+
output_str2 = _inference(tokenizer, model, generation_config, prompt)
|
186 |
+
result2 = prompt_engine.parse_generated_str(output_str2)
|
187 |
+
print(result2)
|
188 |
+
# {'role': 'assistant', 'content': '台北目前的溫度是攝氏30度。'}
|
189 |
+
```
|
190 |
+
|
191 |
+
## Citation
|
192 |
+
|
193 |
+
```
|
194 |
+
@article{breeze2,
|
195 |
+
title={The Breeze 2 Herd of Models: Traditional Chinese LLMs Based on LLaMA with Vision-Aware and Function-Calling Capabilities},
|
196 |
+
author={Breeze Team, MediaTek Research},
|
197 |
+
journal={arXiv},
|
198 |
+
year={2025},
|
199 |
+
url={https://arxiv.org/abs/2501.13921}
|
200 |
+
}
|
201 |
+
```
|