Update README.md
Browse files
README.md
CHANGED
@@ -20,7 +20,7 @@ tp = 1 ## change if you have multiple gpus
|
|
20 |
cache_max_entry_count = 0.2 ## how much vram is reserved for context
|
21 |
|
22 |
engine_config = TurbomindEngineConfig(model_format='awq', dtype='float16', cache_max_entry_count=cache_max_entry_count, tp=tp, quant_policy=8)
|
23 |
-
pipe = pipeline("
|
24 |
tokeniser = AutoTokenizer.from_pretrained("unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit")
|
25 |
snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").to('cuda:0')
|
26 |
```
|
|
|
20 |
cache_max_entry_count = 0.2 ## how much vram is reserved for context
|
21 |
|
22 |
engine_config = TurbomindEngineConfig(model_format='awq', dtype='float16', cache_max_entry_count=cache_max_entry_count, tp=tp, quant_policy=8)
|
23 |
+
pipe = pipeline("YaTharThShaRma999/orpheus_awq", backend_config=engine_config)
|
24 |
tokeniser = AutoTokenizer.from_pretrained("unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit")
|
25 |
snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").to('cuda:0')
|
26 |
```
|