prepare datasets core
Browse files
scripts/prepare_core_datasets.py
CHANGED
@@ -13,13 +13,13 @@ tokenizer_path = '../tokenizer'
|
|
13 |
|
14 |
seqs = [
|
15 |
(0, 1073741824, 1025, 16000),
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
]
|
24 |
|
25 |
#
|
|
|
13 |
|
14 |
seqs = [
|
15 |
(0, 1073741824, 1025, 16000),
|
16 |
+
(1025, 2049, 2049, 8000),
|
17 |
+
(2049, 4097, 4097, 4000),
|
18 |
+
(4097, 8193, 8193, 2000),
|
19 |
+
(8193, 16385, 16385, 1000),
|
20 |
+
(16385, 32769, 32769, 500),
|
21 |
+
(32769, 65537, 65537, 250),
|
22 |
+
(65537, 131073, 131073, 125),
|
23 |
]
|
24 |
|
25 |
#
|