Add fine-tuned DistilBERT model and card
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +128 -0
- all_results.json +16 -0
- checkpoint-1010/config.json +73 -0
- checkpoint-1010/model.safetensors +3 -0
- checkpoint-1010/optimizer.pt +3 -0
- checkpoint-1010/rng_state.pth +3 -0
- checkpoint-1010/scaler.pt +3 -0
- checkpoint-1010/scheduler.pt +3 -0
- checkpoint-1010/special_tokens_map.json +7 -0
- checkpoint-1010/tokenizer.json +0 -0
- checkpoint-1010/tokenizer_config.json +56 -0
- checkpoint-1010/trainer_state.json +224 -0
- checkpoint-1010/training_args.bin +3 -0
- checkpoint-1010/vocab.txt +0 -0
- checkpoint-808/config.json +73 -0
- checkpoint-808/model.safetensors +3 -0
- checkpoint-808/optimizer.pt +3 -0
- checkpoint-808/rng_state.pth +3 -0
- checkpoint-808/scaler.pt +3 -0
- checkpoint-808/scheduler.pt +3 -0
- checkpoint-808/special_tokens_map.json +7 -0
- checkpoint-808/tokenizer.json +0 -0
- checkpoint-808/tokenizer_config.json +56 -0
- checkpoint-808/trainer_state.json +186 -0
- checkpoint-808/training_args.bin +3 -0
- checkpoint-808/vocab.txt +0 -0
- config.json +73 -0
- model.safetensors +3 -0
- runs/Apr15_15-12-43_aigodmode/events.out.tfevents.1744755163.aigodmode.996209.0 +3 -0
- runs/Apr15_15-13-03_aigodmode/events.out.tfevents.1744755184.aigodmode.996626.0 +3 -0
- runs/Apr15_15-13-03_aigodmode/events.out.tfevents.1744755187.aigodmode.996626.1 +3 -0
- runs/Apr15_17-19-20_aigodmode/events.out.tfevents.1744762761.aigodmode.1115972.0 +3 -0
- runs/Apr15_17-19-20_aigodmode/events.out.tfevents.1744762765.aigodmode.1115972.1 +3 -0
- runs/Apr15_18-20-00_aigodmode/events.out.tfevents.1744766401.aigodmode.1175888.0 +3 -0
- runs/Apr15_18-20-00_aigodmode/events.out.tfevents.1744766405.aigodmode.1175888.1 +3 -0
- runs/Apr15_18-41-07_aigodmode/events.out.tfevents.1744767667.aigodmode.1200966.0 +3 -0
- runs/Apr15_18-41-07_aigodmode/events.out.tfevents.1744767675.aigodmode.1200966.1 +3 -0
- runs/Apr15_19-39-18_aigodmode/events.out.tfevents.1744771158.aigodmode.1261440.0 +3 -0
- runs/Apr15_19-39-18_aigodmode/events.out.tfevents.1744771170.aigodmode.1261440.1 +3 -0
- runs/Apr15_19-41-09_aigodmode/events.out.tfevents.1744771270.aigodmode.1263282.0 +3 -0
- runs/Apr15_19-41-09_aigodmode/events.out.tfevents.1744771274.aigodmode.1263282.1 +3 -0
- runs/Apr15_20-08-54_aigodmode/events.out.tfevents.1744772934.aigodmode.1292055.0 +3 -0
- runs/Apr15_20-08-54_aigodmode/events.out.tfevents.1744772939.aigodmode.1292055.1 +3 -0
- runs/Apr15_22-03-10_aigodmode/events.out.tfevents.1744779790.aigodmode.1411307.0 +3 -0
- runs/Apr15_22-03-10_aigodmode/events.out.tfevents.1744779797.aigodmode.1411307.1 +3 -0
- runs/Apr15_23-20-35_aigodmode/events.out.tfevents.1744784435.aigodmode.1497045.0 +3 -0
- runs/Apr15_23-20-35_aigodmode/events.out.tfevents.1744784442.aigodmode.1497045.1 +3 -0
- runs/Apr15_23-21-41_aigodmode/events.out.tfevents.1744784502.aigodmode.1498147.0 +3 -0
- runs/Apr15_23-21-41_aigodmode/events.out.tfevents.1744784513.aigodmode.1498147.1 +3 -0
- runs/Apr15_23-22-43_aigodmode/events.out.tfevents.1744784564.aigodmode.1499184.0 +3 -0
README.md
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language: en
|
3 |
+
license: apache-2.0
|
4 |
+
library_name: transformers
|
5 |
+
tags:
|
6 |
+
- distilbert
|
7 |
+
- text-classification
|
8 |
+
- token-classification
|
9 |
+
- intent-classification
|
10 |
+
- slot-filling
|
11 |
+
- joint-intent-slot
|
12 |
+
- smart-home
|
13 |
+
- generated:Enfuse.io
|
14 |
+
pipeline_tag: token-classification # Or text-classification, often token is used for joint models
|
15 |
+
model-index:
|
16 |
+
- name: distilbert-joint-intent-slot-smarthome
|
17 |
+
results:
|
18 |
+
- task:
|
19 |
+
type: token-classification # Represents the slot filling part
|
20 |
+
name: Slot Filling
|
21 |
+
dataset:
|
22 |
+
name: enfuse/joint-intent-slot-smarthome
|
23 |
+
type: enfuse/joint-intent-slot-smarthome
|
24 |
+
config: default
|
25 |
+
split: test
|
26 |
+
metrics:
|
27 |
+
- type: micro_f1
|
28 |
+
value: 0.8800
|
29 |
+
name: Slot F1 (Micro)
|
30 |
+
- type: precision
|
31 |
+
value: 0.8800
|
32 |
+
name: Slot Precision (Micro)
|
33 |
+
- type: recall
|
34 |
+
value: 0.8800
|
35 |
+
name: Slot Recall (Micro)
|
36 |
+
- task:
|
37 |
+
type: text-classification # Represents the intent part
|
38 |
+
name: Intent Classification
|
39 |
+
dataset:
|
40 |
+
name: enfuse/joint-intent-slot-smarthome
|
41 |
+
type: enfuse/joint-intent-slot-smarthome
|
42 |
+
config: default
|
43 |
+
split: test
|
44 |
+
metrics:
|
45 |
+
- type: accuracy
|
46 |
+
value: 0.9208
|
47 |
+
name: Intent Accuracy
|
48 |
+
---
|
49 |
+
|
50 |
+
# DistilBERT for Smart Home Joint Intent Classification and Slot Filling
|
51 |
+
|
52 |
+
## Model Description
|
53 |
+
|
54 |
+
**Produced By:** [Enfuse.io](https://enfuse.io/)
|
55 |
+
|
56 |
+
This model is a fine-tuned version of `distilbert-base-uncased` specifically adapted for **joint intent classification and slot filling** in the **smart home domain**. Given a user command related to controlling smart home devices (like lights or thermostats), the model simultaneously predicts:
|
57 |
+
|
58 |
+
1. The user's **intent** (e.g., `set_device_state`, `get_device_state`).
|
59 |
+
2. The relevant **slots** (entities like `device_name`, `location`, `state`, `attribute_value`) within the command, using BIO tagging.
|
60 |
+
|
61 |
+
## Intended Use and Limitations
|
62 |
+
|
63 |
+
**Primary Intended Use:** This model is intended for **hobbyist experimentation and educational purposes** related to Natural Language Understanding (NLU) for smart home applications. It can be used as a baseline or starting point for understanding how to build NLU components for simple device control.
|
64 |
+
|
65 |
+
**Disclaimer:** **This model is NOT intended for use in production environments.** Enfuse.io takes **no responsibility** for the performance, reliability, security, or any consequences arising from the use of this model in production systems or safety-critical applications. Use in such contexts is entirely at the user's own risk.
|
66 |
+
|
67 |
+
**Out-of-Scope Use:**
|
68 |
+
* The model is not designed for general conversation or tasks outside the specific smart home intents and slots it was trained on.
|
69 |
+
* It has **no built-in mechanism for handling out-of-domain requests** (e.g., asking about weather, playing music). It will likely attempt to classify such requests into one of the known smart home intents, potentially leading to incorrect behavior.
|
70 |
+
* It has not been evaluated for fairness, bias, or robustness against adversarial inputs.
|
71 |
+
|
72 |
+
## Training Data
|
73 |
+
|
74 |
+
The model was fine-tuned on the `enfuse/joint-intent-slot-smarthome` dataset, specifically the `generated_smarthome_2016_unique.jsonl` version containing 2016 unique synthetic examples.
|
75 |
+
|
76 |
+
This dataset was generated by Enfuse.io using a combination of `mistralai/Mistral-7B-Instruct-v0.1` and `openai/gpt-4o`, followed by validation and de-duplication. Please refer to the [dataset card](https://huggingface.co/datasets/enfuse/joint-intent-slot-smarthome) for more details on the data generation process and limitations.
|
77 |
+
|
78 |
+
## Training Procedure
|
79 |
+
|
80 |
+
### Preprocessing
|
81 |
+
|
82 |
+
The text was tokenized using the `distilbert-base-uncased` tokenizer. Slot labels were converted to a BIO tagging scheme. Input sequences were padded and truncated to a maximum length of 128 tokens.
|
83 |
+
|
84 |
+
### Fine-tuning
|
85 |
+
|
86 |
+
The model was fine-tuned using the Hugging Face `transformers` library Trainer on a single NVIDIA RTX 5090.
|
87 |
+
|
88 |
+
* **Epochs:** 10
|
89 |
+
* **Batch Size:** 16 (per device)
|
90 |
+
* **Learning Rate:** 5e-5 (with linear decay)
|
91 |
+
* **Optimizer:** AdamW
|
92 |
+
* **Precision:** FP16
|
93 |
+
* **Dataset Split:** 80% Train (1612), 10% Validation (202), 10% Test (202)
|
94 |
+
* **Best Model Selection:** The checkpoint with the highest `eval_intent_accuracy` on the validation set during training was selected for the final model (corresponding to Epoch 8 or 10 in the 10-epoch run).
|
95 |
+
|
96 |
+
## Evaluation Results
|
97 |
+
|
98 |
+
The following results were achieved on the **test set** (202 examples) using the best checkpoint saved during training:
|
99 |
+
|
100 |
+
* **Intent Accuracy:** 92.08%
|
101 |
+
* **Slot F1 Score (Micro):** 88.00%
|
102 |
+
* **Slot Precision (Micro):** 88.00%
|
103 |
+
* **Slot Recall (Micro):** 88.00%
|
104 |
+
|
105 |
+
*(Note: These results are specific to this particular training setup and may vary with different hyperparameters or training runs.)*
|
106 |
+
|
107 |
+
## How to Use
|
108 |
+
|
109 |
+
*(You would typically add code examples here showing how to load and use the model with the Transformers pipeline or custom code, similar to the logic in `infer.py`. Since the user requested no code, this section is omitted but would normally be present.)*
|
110 |
+
|
111 |
+
## Model Card Contact
|
112 |
+
|
113 |
+
[Enfuse.io](https://enfuse.io/)
|
114 |
+
|
115 |
+
## Citation
|
116 |
+
|
117 |
+
If you use this model, please cite the dataset:
|
118 |
+
|
119 |
+
```bibtex
|
120 |
+
@misc{enfuse_smarthome_intent_slot_2024,
|
121 |
+
author = {Enfuse.io},
|
122 |
+
title = {Enfuse Smart Home Joint Intent and Slot Filling Dataset},
|
123 |
+
year = {2024},
|
124 |
+
publisher = {Hugging Face},
|
125 |
+
journal = {Hugging Face Hub},
|
126 |
+
howpublished = {\url{https://huggingface.co/datasets/enfuse/joint-intent-slot-smarthome}}
|
127 |
+
}
|
128 |
+
```
|
all_results.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_intent_accuracy": 0.9207920792079208,
|
4 |
+
"eval_loss": 0.6127998232841492,
|
5 |
+
"eval_runtime": 0.0518,
|
6 |
+
"eval_samples_per_second": 3900.241,
|
7 |
+
"eval_slot_f1": 0.880019120458891,
|
8 |
+
"eval_slot_precision": 0.880019120458891,
|
9 |
+
"eval_slot_recall": 0.880019120458891,
|
10 |
+
"eval_steps_per_second": 77.233,
|
11 |
+
"total_flos": 526646237245440.0,
|
12 |
+
"train_loss": 0.5116730279261523,
|
13 |
+
"train_runtime": 22.7338,
|
14 |
+
"train_samples_per_second": 709.076,
|
15 |
+
"train_steps_per_second": 44.427
|
16 |
+
}
|
checkpoint-1010/config.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation": "gelu",
|
3 |
+
"architectures": [
|
4 |
+
"DistilBertForJointIntentSlotFilling"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.1,
|
7 |
+
"dim": 768,
|
8 |
+
"dropout": 0.1,
|
9 |
+
"hidden_dim": 3072,
|
10 |
+
"id2intent_label": {
|
11 |
+
"0": "get_device_state",
|
12 |
+
"1": "set_device_attribute",
|
13 |
+
"2": "set_device_state"
|
14 |
+
},
|
15 |
+
"id2label": {
|
16 |
+
"0": "LABEL_0",
|
17 |
+
"1": "LABEL_1",
|
18 |
+
"2": "LABEL_2"
|
19 |
+
},
|
20 |
+
"id2slot_label": {
|
21 |
+
"0": "O",
|
22 |
+
"1": "B-attribute_type",
|
23 |
+
"2": "I-attribute_type",
|
24 |
+
"3": "B-attribute_value",
|
25 |
+
"4": "I-attribute_value",
|
26 |
+
"5": "B-device_name",
|
27 |
+
"6": "I-device_name",
|
28 |
+
"7": "B-location",
|
29 |
+
"8": "I-location",
|
30 |
+
"9": "B-state",
|
31 |
+
"10": "I-state"
|
32 |
+
},
|
33 |
+
"initializer_range": 0.02,
|
34 |
+
"intent_label2id": {
|
35 |
+
"get_device_state": 0,
|
36 |
+
"set_device_attribute": 1,
|
37 |
+
"set_device_state": 2
|
38 |
+
},
|
39 |
+
"intent_loss_coef": 1.0,
|
40 |
+
"label2id": {
|
41 |
+
"LABEL_0": 0,
|
42 |
+
"LABEL_1": 1,
|
43 |
+
"LABEL_2": 2
|
44 |
+
},
|
45 |
+
"max_position_embeddings": 512,
|
46 |
+
"model_type": "distilbert",
|
47 |
+
"n_heads": 12,
|
48 |
+
"n_layers": 6,
|
49 |
+
"num_intent_labels": 3,
|
50 |
+
"num_slot_labels": 11,
|
51 |
+
"pad_token_id": 0,
|
52 |
+
"qa_dropout": 0.1,
|
53 |
+
"seq_classif_dropout": 0.2,
|
54 |
+
"sinusoidal_pos_embds": false,
|
55 |
+
"slot_label2id": {
|
56 |
+
"B-attribute_type": 1,
|
57 |
+
"B-attribute_value": 3,
|
58 |
+
"B-device_name": 5,
|
59 |
+
"B-location": 7,
|
60 |
+
"B-state": 9,
|
61 |
+
"I-attribute_type": 2,
|
62 |
+
"I-attribute_value": 4,
|
63 |
+
"I-device_name": 6,
|
64 |
+
"I-location": 8,
|
65 |
+
"I-state": 10,
|
66 |
+
"O": 0
|
67 |
+
},
|
68 |
+
"slot_loss_coef": 1.0,
|
69 |
+
"tie_weights_": true,
|
70 |
+
"torch_dtype": "float32",
|
71 |
+
"transformers_version": "4.51.3",
|
72 |
+
"vocab_size": 30522
|
73 |
+
}
|
checkpoint-1010/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93648248aa5f5b2bba475dd45e39523e87d8d476b2b0963e788330c821658952
|
3 |
+
size 265507144
|
checkpoint-1010/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21b31a3bf879b8944c1933858eed98b7d0be7bacac83c83f16e99621ad542aa8
|
3 |
+
size 531076747
|
checkpoint-1010/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d82205c82ca0e59cbd17794777700714eabf1c99241e9d6f60a8374a27c5982
|
3 |
+
size 14645
|
checkpoint-1010/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff4ab2e42d3da0b44d79c51f9ee188d20a00cf98ef55caa853236c82352c6032
|
3 |
+
size 1383
|
checkpoint-1010/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c670f9ce3c4ad30a56141825dd15a2fa3016bb890950ac61a4a9a65bdbf58199
|
3 |
+
size 1465
|
checkpoint-1010/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
checkpoint-1010/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1010/tokenizer_config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"extra_special_tokens": {},
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"pad_token": "[PAD]",
|
51 |
+
"sep_token": "[SEP]",
|
52 |
+
"strip_accents": null,
|
53 |
+
"tokenize_chinese_chars": true,
|
54 |
+
"tokenizer_class": "DistilBertTokenizer",
|
55 |
+
"unk_token": "[UNK]"
|
56 |
+
}
|
checkpoint-1010/trainer_state.json
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 808,
|
3 |
+
"best_metric": 0.9108910891089109,
|
4 |
+
"best_model_checkpoint": "./results_distilbert_custom/checkpoint-808",
|
5 |
+
"epoch": 10.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 1010,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.9900990099009901,
|
14 |
+
"grad_norm": 2.6412200927734375,
|
15 |
+
"learning_rate": 4.51980198019802e-05,
|
16 |
+
"loss": 1.0179,
|
17 |
+
"step": 100
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 1.0,
|
21 |
+
"eval_intent_accuracy": 0.900990099009901,
|
22 |
+
"eval_loss": 0.6627025008201599,
|
23 |
+
"eval_runtime": 0.0532,
|
24 |
+
"eval_samples_per_second": 3796.158,
|
25 |
+
"eval_slot_f1": 0.8583372039015328,
|
26 |
+
"eval_slot_precision": 0.8583372039015328,
|
27 |
+
"eval_slot_recall": 0.8583372039015328,
|
28 |
+
"eval_steps_per_second": 75.171,
|
29 |
+
"step": 101
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 1.9801980198019802,
|
33 |
+
"grad_norm": 1.697471022605896,
|
34 |
+
"learning_rate": 4.0247524752475254e-05,
|
35 |
+
"loss": 0.5933,
|
36 |
+
"step": 200
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 2.0,
|
40 |
+
"eval_intent_accuracy": 0.8910891089108911,
|
41 |
+
"eval_loss": 0.5899206399917603,
|
42 |
+
"eval_runtime": 0.0516,
|
43 |
+
"eval_samples_per_second": 3912.995,
|
44 |
+
"eval_slot_f1": 0.8471899674872271,
|
45 |
+
"eval_slot_precision": 0.8471899674872271,
|
46 |
+
"eval_slot_recall": 0.8471899674872271,
|
47 |
+
"eval_steps_per_second": 77.485,
|
48 |
+
"step": 202
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"epoch": 2.9702970297029703,
|
52 |
+
"grad_norm": 3.684925079345703,
|
53 |
+
"learning_rate": 3.52970297029703e-05,
|
54 |
+
"loss": 0.5248,
|
55 |
+
"step": 300
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 3.0,
|
59 |
+
"eval_intent_accuracy": 0.900990099009901,
|
60 |
+
"eval_loss": 0.5877912640571594,
|
61 |
+
"eval_runtime": 0.0512,
|
62 |
+
"eval_samples_per_second": 3945.962,
|
63 |
+
"eval_slot_f1": 0.864375290292615,
|
64 |
+
"eval_slot_precision": 0.864375290292615,
|
65 |
+
"eval_slot_recall": 0.864375290292615,
|
66 |
+
"eval_steps_per_second": 78.138,
|
67 |
+
"step": 303
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 3.9603960396039604,
|
71 |
+
"grad_norm": 7.954131126403809,
|
72 |
+
"learning_rate": 3.0346534653465347e-05,
|
73 |
+
"loss": 0.489,
|
74 |
+
"step": 400
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 4.0,
|
78 |
+
"eval_intent_accuracy": 0.905940594059406,
|
79 |
+
"eval_loss": 0.6087009906768799,
|
80 |
+
"eval_runtime": 0.0514,
|
81 |
+
"eval_samples_per_second": 3928.963,
|
82 |
+
"eval_slot_f1": 0.864375290292615,
|
83 |
+
"eval_slot_precision": 0.864375290292615,
|
84 |
+
"eval_slot_recall": 0.864375290292615,
|
85 |
+
"eval_steps_per_second": 77.801,
|
86 |
+
"step": 404
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 4.9504950495049505,
|
90 |
+
"grad_norm": 5.173593044281006,
|
91 |
+
"learning_rate": 2.53960396039604e-05,
|
92 |
+
"loss": 0.468,
|
93 |
+
"step": 500
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 5.0,
|
97 |
+
"eval_intent_accuracy": 0.8762376237623762,
|
98 |
+
"eval_loss": 0.6051058173179626,
|
99 |
+
"eval_runtime": 0.052,
|
100 |
+
"eval_samples_per_second": 3887.446,
|
101 |
+
"eval_slot_f1": 0.8732001857872735,
|
102 |
+
"eval_slot_precision": 0.8732001857872735,
|
103 |
+
"eval_slot_recall": 0.8732001857872735,
|
104 |
+
"eval_steps_per_second": 76.979,
|
105 |
+
"step": 505
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 5.9405940594059405,
|
109 |
+
"grad_norm": 2.602719306945801,
|
110 |
+
"learning_rate": 2.0445544554455444e-05,
|
111 |
+
"loss": 0.4491,
|
112 |
+
"step": 600
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 6.0,
|
116 |
+
"eval_intent_accuracy": 0.905940594059406,
|
117 |
+
"eval_loss": 0.6407473683357239,
|
118 |
+
"eval_runtime": 0.0554,
|
119 |
+
"eval_samples_per_second": 3647.88,
|
120 |
+
"eval_slot_f1": 0.87598699489085,
|
121 |
+
"eval_slot_precision": 0.87598699489085,
|
122 |
+
"eval_slot_recall": 0.87598699489085,
|
123 |
+
"eval_steps_per_second": 72.235,
|
124 |
+
"step": 606
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"epoch": 6.930693069306931,
|
128 |
+
"grad_norm": 2.991025924682617,
|
129 |
+
"learning_rate": 1.5495049504950496e-05,
|
130 |
+
"loss": 0.4204,
|
131 |
+
"step": 700
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 7.0,
|
135 |
+
"eval_intent_accuracy": 0.8910891089108911,
|
136 |
+
"eval_loss": 0.6234941482543945,
|
137 |
+
"eval_runtime": 0.0568,
|
138 |
+
"eval_samples_per_second": 3558.795,
|
139 |
+
"eval_slot_f1": 0.8773803994426381,
|
140 |
+
"eval_slot_precision": 0.8773803994426381,
|
141 |
+
"eval_slot_recall": 0.8773803994426381,
|
142 |
+
"eval_steps_per_second": 70.471,
|
143 |
+
"step": 707
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"epoch": 7.920792079207921,
|
147 |
+
"grad_norm": 2.211575746536255,
|
148 |
+
"learning_rate": 1.0544554455445545e-05,
|
149 |
+
"loss": 0.4028,
|
150 |
+
"step": 800
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"epoch": 8.0,
|
154 |
+
"eval_intent_accuracy": 0.9108910891089109,
|
155 |
+
"eval_loss": 0.65041583776474,
|
156 |
+
"eval_runtime": 0.0519,
|
157 |
+
"eval_samples_per_second": 3893.86,
|
158 |
+
"eval_slot_f1": 0.878309335810497,
|
159 |
+
"eval_slot_precision": 0.878309335810497,
|
160 |
+
"eval_slot_recall": 0.878309335810497,
|
161 |
+
"eval_steps_per_second": 77.106,
|
162 |
+
"step": 808
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"epoch": 8.910891089108912,
|
166 |
+
"grad_norm": 1.5847089290618896,
|
167 |
+
"learning_rate": 5.594059405940594e-06,
|
168 |
+
"loss": 0.3929,
|
169 |
+
"step": 900
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"epoch": 9.0,
|
173 |
+
"eval_intent_accuracy": 0.900990099009901,
|
174 |
+
"eval_loss": 0.6379386782646179,
|
175 |
+
"eval_runtime": 0.051,
|
176 |
+
"eval_samples_per_second": 3959.979,
|
177 |
+
"eval_slot_f1": 0.8769159312587088,
|
178 |
+
"eval_slot_precision": 0.8769159312587088,
|
179 |
+
"eval_slot_recall": 0.8769159312587088,
|
180 |
+
"eval_steps_per_second": 78.415,
|
181 |
+
"step": 909
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 9.900990099009901,
|
185 |
+
"grad_norm": 5.904131889343262,
|
186 |
+
"learning_rate": 6.435643564356436e-07,
|
187 |
+
"loss": 0.3679,
|
188 |
+
"step": 1000
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 10.0,
|
192 |
+
"eval_intent_accuracy": 0.9108910891089109,
|
193 |
+
"eval_loss": 0.6317320466041565,
|
194 |
+
"eval_runtime": 0.0518,
|
195 |
+
"eval_samples_per_second": 3899.99,
|
196 |
+
"eval_slot_f1": 0.8797027403622851,
|
197 |
+
"eval_slot_precision": 0.8797027403622851,
|
198 |
+
"eval_slot_recall": 0.8797027403622851,
|
199 |
+
"eval_steps_per_second": 77.228,
|
200 |
+
"step": 1010
|
201 |
+
}
|
202 |
+
],
|
203 |
+
"logging_steps": 100,
|
204 |
+
"max_steps": 1010,
|
205 |
+
"num_input_tokens_seen": 0,
|
206 |
+
"num_train_epochs": 10,
|
207 |
+
"save_steps": 500,
|
208 |
+
"stateful_callbacks": {
|
209 |
+
"TrainerControl": {
|
210 |
+
"args": {
|
211 |
+
"should_epoch_stop": false,
|
212 |
+
"should_evaluate": false,
|
213 |
+
"should_log": false,
|
214 |
+
"should_save": true,
|
215 |
+
"should_training_stop": true
|
216 |
+
},
|
217 |
+
"attributes": {}
|
218 |
+
}
|
219 |
+
},
|
220 |
+
"total_flos": 526646237245440.0,
|
221 |
+
"train_batch_size": 16,
|
222 |
+
"trial_name": null,
|
223 |
+
"trial_params": null
|
224 |
+
}
|
checkpoint-1010/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5460ae7fb569e4afe080919a3302f4475b1c884a06713458cae27af7d9e6a9cf
|
3 |
+
size 5777
|
checkpoint-1010/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-808/config.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation": "gelu",
|
3 |
+
"architectures": [
|
4 |
+
"DistilBertForJointIntentSlotFilling"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.1,
|
7 |
+
"dim": 768,
|
8 |
+
"dropout": 0.1,
|
9 |
+
"hidden_dim": 3072,
|
10 |
+
"id2intent_label": {
|
11 |
+
"0": "get_device_state",
|
12 |
+
"1": "set_device_attribute",
|
13 |
+
"2": "set_device_state"
|
14 |
+
},
|
15 |
+
"id2label": {
|
16 |
+
"0": "LABEL_0",
|
17 |
+
"1": "LABEL_1",
|
18 |
+
"2": "LABEL_2"
|
19 |
+
},
|
20 |
+
"id2slot_label": {
|
21 |
+
"0": "O",
|
22 |
+
"1": "B-attribute_type",
|
23 |
+
"2": "I-attribute_type",
|
24 |
+
"3": "B-attribute_value",
|
25 |
+
"4": "I-attribute_value",
|
26 |
+
"5": "B-device_name",
|
27 |
+
"6": "I-device_name",
|
28 |
+
"7": "B-location",
|
29 |
+
"8": "I-location",
|
30 |
+
"9": "B-state",
|
31 |
+
"10": "I-state"
|
32 |
+
},
|
33 |
+
"initializer_range": 0.02,
|
34 |
+
"intent_label2id": {
|
35 |
+
"get_device_state": 0,
|
36 |
+
"set_device_attribute": 1,
|
37 |
+
"set_device_state": 2
|
38 |
+
},
|
39 |
+
"intent_loss_coef": 1.0,
|
40 |
+
"label2id": {
|
41 |
+
"LABEL_0": 0,
|
42 |
+
"LABEL_1": 1,
|
43 |
+
"LABEL_2": 2
|
44 |
+
},
|
45 |
+
"max_position_embeddings": 512,
|
46 |
+
"model_type": "distilbert",
|
47 |
+
"n_heads": 12,
|
48 |
+
"n_layers": 6,
|
49 |
+
"num_intent_labels": 3,
|
50 |
+
"num_slot_labels": 11,
|
51 |
+
"pad_token_id": 0,
|
52 |
+
"qa_dropout": 0.1,
|
53 |
+
"seq_classif_dropout": 0.2,
|
54 |
+
"sinusoidal_pos_embds": false,
|
55 |
+
"slot_label2id": {
|
56 |
+
"B-attribute_type": 1,
|
57 |
+
"B-attribute_value": 3,
|
58 |
+
"B-device_name": 5,
|
59 |
+
"B-location": 7,
|
60 |
+
"B-state": 9,
|
61 |
+
"I-attribute_type": 2,
|
62 |
+
"I-attribute_value": 4,
|
63 |
+
"I-device_name": 6,
|
64 |
+
"I-location": 8,
|
65 |
+
"I-state": 10,
|
66 |
+
"O": 0
|
67 |
+
},
|
68 |
+
"slot_loss_coef": 1.0,
|
69 |
+
"tie_weights_": true,
|
70 |
+
"torch_dtype": "float32",
|
71 |
+
"transformers_version": "4.51.3",
|
72 |
+
"vocab_size": 30522
|
73 |
+
}
|
checkpoint-808/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c31f1badc5c6e7b3bc137d8f9e31d51672a8f47fe10c383ffc5a60cff77e961
|
3 |
+
size 265507144
|
checkpoint-808/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d01715dea30e8e3316263b4112055a23d620f3992a717339486141fe5802689
|
3 |
+
size 531076747
|
checkpoint-808/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1d3c6c5729df1c06893524eebca8d445a73ee949ab0f5b8968bfde1fff2dd5e
|
3 |
+
size 14645
|
checkpoint-808/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2309eb914966d75cf8e4e0e7271c74191a61b3d57d0e33e440d8ca2990543d8
|
3 |
+
size 1383
|
checkpoint-808/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e624932d18ec9c281888584c8ffdcc866ed10ee6365627cd55003a9eeb6e9fc7
|
3 |
+
size 1465
|
checkpoint-808/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
checkpoint-808/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-808/tokenizer_config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"extra_special_tokens": {},
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"pad_token": "[PAD]",
|
51 |
+
"sep_token": "[SEP]",
|
52 |
+
"strip_accents": null,
|
53 |
+
"tokenize_chinese_chars": true,
|
54 |
+
"tokenizer_class": "DistilBertTokenizer",
|
55 |
+
"unk_token": "[UNK]"
|
56 |
+
}
|
checkpoint-808/trainer_state.json
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 808,
|
3 |
+
"best_metric": 0.9108910891089109,
|
4 |
+
"best_model_checkpoint": "./results_distilbert_custom/checkpoint-808",
|
5 |
+
"epoch": 8.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 808,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.9900990099009901,
|
14 |
+
"grad_norm": 2.6412200927734375,
|
15 |
+
"learning_rate": 4.51980198019802e-05,
|
16 |
+
"loss": 1.0179,
|
17 |
+
"step": 100
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 1.0,
|
21 |
+
"eval_intent_accuracy": 0.900990099009901,
|
22 |
+
"eval_loss": 0.6627025008201599,
|
23 |
+
"eval_runtime": 0.0532,
|
24 |
+
"eval_samples_per_second": 3796.158,
|
25 |
+
"eval_slot_f1": 0.8583372039015328,
|
26 |
+
"eval_slot_precision": 0.8583372039015328,
|
27 |
+
"eval_slot_recall": 0.8583372039015328,
|
28 |
+
"eval_steps_per_second": 75.171,
|
29 |
+
"step": 101
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 1.9801980198019802,
|
33 |
+
"grad_norm": 1.697471022605896,
|
34 |
+
"learning_rate": 4.0247524752475254e-05,
|
35 |
+
"loss": 0.5933,
|
36 |
+
"step": 200
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 2.0,
|
40 |
+
"eval_intent_accuracy": 0.8910891089108911,
|
41 |
+
"eval_loss": 0.5899206399917603,
|
42 |
+
"eval_runtime": 0.0516,
|
43 |
+
"eval_samples_per_second": 3912.995,
|
44 |
+
"eval_slot_f1": 0.8471899674872271,
|
45 |
+
"eval_slot_precision": 0.8471899674872271,
|
46 |
+
"eval_slot_recall": 0.8471899674872271,
|
47 |
+
"eval_steps_per_second": 77.485,
|
48 |
+
"step": 202
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"epoch": 2.9702970297029703,
|
52 |
+
"grad_norm": 3.684925079345703,
|
53 |
+
"learning_rate": 3.52970297029703e-05,
|
54 |
+
"loss": 0.5248,
|
55 |
+
"step": 300
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 3.0,
|
59 |
+
"eval_intent_accuracy": 0.900990099009901,
|
60 |
+
"eval_loss": 0.5877912640571594,
|
61 |
+
"eval_runtime": 0.0512,
|
62 |
+
"eval_samples_per_second": 3945.962,
|
63 |
+
"eval_slot_f1": 0.864375290292615,
|
64 |
+
"eval_slot_precision": 0.864375290292615,
|
65 |
+
"eval_slot_recall": 0.864375290292615,
|
66 |
+
"eval_steps_per_second": 78.138,
|
67 |
+
"step": 303
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 3.9603960396039604,
|
71 |
+
"grad_norm": 7.954131126403809,
|
72 |
+
"learning_rate": 3.0346534653465347e-05,
|
73 |
+
"loss": 0.489,
|
74 |
+
"step": 400
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 4.0,
|
78 |
+
"eval_intent_accuracy": 0.905940594059406,
|
79 |
+
"eval_loss": 0.6087009906768799,
|
80 |
+
"eval_runtime": 0.0514,
|
81 |
+
"eval_samples_per_second": 3928.963,
|
82 |
+
"eval_slot_f1": 0.864375290292615,
|
83 |
+
"eval_slot_precision": 0.864375290292615,
|
84 |
+
"eval_slot_recall": 0.864375290292615,
|
85 |
+
"eval_steps_per_second": 77.801,
|
86 |
+
"step": 404
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 4.9504950495049505,
|
90 |
+
"grad_norm": 5.173593044281006,
|
91 |
+
"learning_rate": 2.53960396039604e-05,
|
92 |
+
"loss": 0.468,
|
93 |
+
"step": 500
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 5.0,
|
97 |
+
"eval_intent_accuracy": 0.8762376237623762,
|
98 |
+
"eval_loss": 0.6051058173179626,
|
99 |
+
"eval_runtime": 0.052,
|
100 |
+
"eval_samples_per_second": 3887.446,
|
101 |
+
"eval_slot_f1": 0.8732001857872735,
|
102 |
+
"eval_slot_precision": 0.8732001857872735,
|
103 |
+
"eval_slot_recall": 0.8732001857872735,
|
104 |
+
"eval_steps_per_second": 76.979,
|
105 |
+
"step": 505
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 5.9405940594059405,
|
109 |
+
"grad_norm": 2.602719306945801,
|
110 |
+
"learning_rate": 2.0445544554455444e-05,
|
111 |
+
"loss": 0.4491,
|
112 |
+
"step": 600
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 6.0,
|
116 |
+
"eval_intent_accuracy": 0.905940594059406,
|
117 |
+
"eval_loss": 0.6407473683357239,
|
118 |
+
"eval_runtime": 0.0554,
|
119 |
+
"eval_samples_per_second": 3647.88,
|
120 |
+
"eval_slot_f1": 0.87598699489085,
|
121 |
+
"eval_slot_precision": 0.87598699489085,
|
122 |
+
"eval_slot_recall": 0.87598699489085,
|
123 |
+
"eval_steps_per_second": 72.235,
|
124 |
+
"step": 606
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"epoch": 6.930693069306931,
|
128 |
+
"grad_norm": 2.991025924682617,
|
129 |
+
"learning_rate": 1.5495049504950496e-05,
|
130 |
+
"loss": 0.4204,
|
131 |
+
"step": 700
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 7.0,
|
135 |
+
"eval_intent_accuracy": 0.8910891089108911,
|
136 |
+
"eval_loss": 0.6234941482543945,
|
137 |
+
"eval_runtime": 0.0568,
|
138 |
+
"eval_samples_per_second": 3558.795,
|
139 |
+
"eval_slot_f1": 0.8773803994426381,
|
140 |
+
"eval_slot_precision": 0.8773803994426381,
|
141 |
+
"eval_slot_recall": 0.8773803994426381,
|
142 |
+
"eval_steps_per_second": 70.471,
|
143 |
+
"step": 707
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"epoch": 7.920792079207921,
|
147 |
+
"grad_norm": 2.211575746536255,
|
148 |
+
"learning_rate": 1.0544554455445545e-05,
|
149 |
+
"loss": 0.4028,
|
150 |
+
"step": 800
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"epoch": 8.0,
|
154 |
+
"eval_intent_accuracy": 0.9108910891089109,
|
155 |
+
"eval_loss": 0.65041583776474,
|
156 |
+
"eval_runtime": 0.0519,
|
157 |
+
"eval_samples_per_second": 3893.86,
|
158 |
+
"eval_slot_f1": 0.878309335810497,
|
159 |
+
"eval_slot_precision": 0.878309335810497,
|
160 |
+
"eval_slot_recall": 0.878309335810497,
|
161 |
+
"eval_steps_per_second": 77.106,
|
162 |
+
"step": 808
|
163 |
+
}
|
164 |
+
],
|
165 |
+
"logging_steps": 100,
|
166 |
+
"max_steps": 1010,
|
167 |
+
"num_input_tokens_seen": 0,
|
168 |
+
"num_train_epochs": 10,
|
169 |
+
"save_steps": 500,
|
170 |
+
"stateful_callbacks": {
|
171 |
+
"TrainerControl": {
|
172 |
+
"args": {
|
173 |
+
"should_epoch_stop": false,
|
174 |
+
"should_evaluate": false,
|
175 |
+
"should_log": false,
|
176 |
+
"should_save": true,
|
177 |
+
"should_training_stop": false
|
178 |
+
},
|
179 |
+
"attributes": {}
|
180 |
+
}
|
181 |
+
},
|
182 |
+
"total_flos": 421316989796352.0,
|
183 |
+
"train_batch_size": 16,
|
184 |
+
"trial_name": null,
|
185 |
+
"trial_params": null
|
186 |
+
}
|
checkpoint-808/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5460ae7fb569e4afe080919a3302f4475b1c884a06713458cae27af7d9e6a9cf
|
3 |
+
size 5777
|
checkpoint-808/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation": "gelu",
|
3 |
+
"architectures": [
|
4 |
+
"DistilBertForJointIntentSlotFilling"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.1,
|
7 |
+
"dim": 768,
|
8 |
+
"dropout": 0.1,
|
9 |
+
"hidden_dim": 3072,
|
10 |
+
"id2intent_label": {
|
11 |
+
"0": "get_device_state",
|
12 |
+
"1": "set_device_attribute",
|
13 |
+
"2": "set_device_state"
|
14 |
+
},
|
15 |
+
"id2label": {
|
16 |
+
"0": "LABEL_0",
|
17 |
+
"1": "LABEL_1",
|
18 |
+
"2": "LABEL_2"
|
19 |
+
},
|
20 |
+
"id2slot_label": {
|
21 |
+
"0": "O",
|
22 |
+
"1": "B-attribute_type",
|
23 |
+
"2": "I-attribute_type",
|
24 |
+
"3": "B-attribute_value",
|
25 |
+
"4": "I-attribute_value",
|
26 |
+
"5": "B-device_name",
|
27 |
+
"6": "I-device_name",
|
28 |
+
"7": "B-location",
|
29 |
+
"8": "I-location",
|
30 |
+
"9": "B-state",
|
31 |
+
"10": "I-state"
|
32 |
+
},
|
33 |
+
"initializer_range": 0.02,
|
34 |
+
"intent_label2id": {
|
35 |
+
"get_device_state": 0,
|
36 |
+
"set_device_attribute": 1,
|
37 |
+
"set_device_state": 2
|
38 |
+
},
|
39 |
+
"intent_loss_coef": 1.0,
|
40 |
+
"label2id": {
|
41 |
+
"LABEL_0": 0,
|
42 |
+
"LABEL_1": 1,
|
43 |
+
"LABEL_2": 2
|
44 |
+
},
|
45 |
+
"max_position_embeddings": 512,
|
46 |
+
"model_type": "distilbert",
|
47 |
+
"n_heads": 12,
|
48 |
+
"n_layers": 6,
|
49 |
+
"num_intent_labels": 3,
|
50 |
+
"num_slot_labels": 11,
|
51 |
+
"pad_token_id": 0,
|
52 |
+
"qa_dropout": 0.1,
|
53 |
+
"seq_classif_dropout": 0.2,
|
54 |
+
"sinusoidal_pos_embds": false,
|
55 |
+
"slot_label2id": {
|
56 |
+
"B-attribute_type": 1,
|
57 |
+
"B-attribute_value": 3,
|
58 |
+
"B-device_name": 5,
|
59 |
+
"B-location": 7,
|
60 |
+
"B-state": 9,
|
61 |
+
"I-attribute_type": 2,
|
62 |
+
"I-attribute_value": 4,
|
63 |
+
"I-device_name": 6,
|
64 |
+
"I-location": 8,
|
65 |
+
"I-state": 10,
|
66 |
+
"O": 0
|
67 |
+
},
|
68 |
+
"slot_loss_coef": 1.0,
|
69 |
+
"tie_weights_": true,
|
70 |
+
"torch_dtype": "float32",
|
71 |
+
"transformers_version": "4.51.3",
|
72 |
+
"vocab_size": 30522
|
73 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c31f1badc5c6e7b3bc137d8f9e31d51672a8f47fe10c383ffc5a60cff77e961
|
3 |
+
size 265507144
|
runs/Apr15_15-12-43_aigodmode/events.out.tfevents.1744755163.aigodmode.996209.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f35b0c3307c8edce30c0e71cb9c3ead35b3fe25a0332f9781be0a689438b6182
|
3 |
+
size 5938
|
runs/Apr15_15-13-03_aigodmode/events.out.tfevents.1744755184.aigodmode.996626.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7c22901362b8d9f274dce0f236ea66c22cac503e44622e32bb092e7b73bb8ca
|
3 |
+
size 7740
|
runs/Apr15_15-13-03_aigodmode/events.out.tfevents.1744755187.aigodmode.996626.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff2e00c56c39064e4852c9e87175f4bbd0bc89dc24ed3606106eb3b0d4c21ae1
|
3 |
+
size 573
|
runs/Apr15_17-19-20_aigodmode/events.out.tfevents.1744762761.aigodmode.1115972.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a50df85af1b5d0a3f06e567d52910b8be7c9f801e431725a3b5d01710f55082e
|
3 |
+
size 7962
|
runs/Apr15_17-19-20_aigodmode/events.out.tfevents.1744762765.aigodmode.1115972.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:399ecf3dc37daf2dd0923162f5c0e3147dd7bc437b82162a14643109544776f3
|
3 |
+
size 582
|
runs/Apr15_18-20-00_aigodmode/events.out.tfevents.1744766401.aigodmode.1175888.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ce186316ae6eb1fc9acdb21cbdf157926cb80e74093de0f1194d7745f855c07
|
3 |
+
size 7962
|
runs/Apr15_18-20-00_aigodmode/events.out.tfevents.1744766405.aigodmode.1175888.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8503cfe44841a0fb1e6e276c964e55d9584a406d86d89e646bbc9026d4eec916
|
3 |
+
size 582
|
runs/Apr15_18-41-07_aigodmode/events.out.tfevents.1744767667.aigodmode.1200966.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3f5508d3cac29164527cf1bfda59632f225f685b58a503c84e2c384ba15143c
|
3 |
+
size 8393
|
runs/Apr15_18-41-07_aigodmode/events.out.tfevents.1744767675.aigodmode.1200966.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4af9c5c60777f79dac5ec4bdf01168be4fa51925c7e771776a460d2fa17397f
|
3 |
+
size 582
|
runs/Apr15_19-39-18_aigodmode/events.out.tfevents.1744771158.aigodmode.1261440.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caf00a06717da11a4159c820fe56c91fe07a278da445f5cfe8e173ea9aac7aea
|
3 |
+
size 9035
|
runs/Apr15_19-39-18_aigodmode/events.out.tfevents.1744771170.aigodmode.1261440.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a131f7bf860686603c5a621154634d8b823293703fb9c4d5e9a300de23e67e9
|
3 |
+
size 582
|
runs/Apr15_19-41-09_aigodmode/events.out.tfevents.1744771270.aigodmode.1263282.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43492f9f7312018fa2d91feda6fdfc71d54df79c61ba9c71722b4ccb0a786cc0
|
3 |
+
size 7962
|
runs/Apr15_19-41-09_aigodmode/events.out.tfevents.1744771274.aigodmode.1263282.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf0328a70b832431662157df678c2669601f975d5c7640d0f41ceedd753470e9
|
3 |
+
size 582
|
runs/Apr15_20-08-54_aigodmode/events.out.tfevents.1744772934.aigodmode.1292055.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87b67a926bafbf632bccd8d5b8f9fa3decfcd16dd13b0b0c1cc19bc9182f3272
|
3 |
+
size 7962
|
runs/Apr15_20-08-54_aigodmode/events.out.tfevents.1744772939.aigodmode.1292055.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28519c868715aca438a32fa2a9607390d9c12ed964f671b2e2818f1e3a3cc7b1
|
3 |
+
size 582
|
runs/Apr15_22-03-10_aigodmode/events.out.tfevents.1744779790.aigodmode.1411307.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc6baddd8e8f1fe4a3177de09a0de4b5d7c051eda63b0d572d0ecc14b73fc3c6
|
3 |
+
size 8182
|
runs/Apr15_22-03-10_aigodmode/events.out.tfevents.1744779797.aigodmode.1411307.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af3f3f15bd5aa8ffe18a4f4a9fe299ce211a8efcd994ad6578dc0a90a7cf296c
|
3 |
+
size 582
|
runs/Apr15_23-20-35_aigodmode/events.out.tfevents.1744784435.aigodmode.1497045.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18aff3b782064c46c9073ee0c584e2d8c78c7d74ab1290a28f77322853a2828a
|
3 |
+
size 8393
|
runs/Apr15_23-20-35_aigodmode/events.out.tfevents.1744784442.aigodmode.1497045.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5056c63e5751b899ff0eb8db8ea3c6cc113dc812cd2033a7f1922a6449f4b8c
|
3 |
+
size 582
|
runs/Apr15_23-21-41_aigodmode/events.out.tfevents.1744784502.aigodmode.1498147.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44198eda7e8f8942f947fe63aaeedc680e6100dccb2caa9c5b53ea2f144e52aa
|
3 |
+
size 9803
|
runs/Apr15_23-21-41_aigodmode/events.out.tfevents.1744784513.aigodmode.1498147.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a30fbfd74417bed3ed6c0462bc78c7604635f922db61eb06b80cde1fdf063463
|
3 |
+
size 582
|
runs/Apr15_23-22-43_aigodmode/events.out.tfevents.1744784564.aigodmode.1499184.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:630efb1b5574425843d6f147f63b66edd4ac7d9c654093d43667c97f0c0640ca
|
3 |
+
size 13329
|