Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +10 -0
- README.md +705 -0
- adapter_config.json +34 -0
- adapter_model.safetensors +3 -0
- config_sentence_transformers.json +10 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +56 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,705 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:13734
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: intfloat/e5-small-v2
|
10 |
+
widget:
|
11 |
+
- source_sentence: predict
|
12 |
+
sentences:
|
13 |
+
- " def _compute_score_samples(self, X, subsample_features):\n \"\"\"\n\
|
14 |
+
\ Compute the score of each samples in X going through the extra trees.\n\
|
15 |
+
\n Parameters\n ----------\n X : array-like or sparse matrix\n\
|
16 |
+
\ Data matrix.\n\n subsample_features : bool\n Whether\
|
17 |
+
\ features should be subsampled.\n\n Returns\n -------\n \
|
18 |
+
\ scores : ndarray of shape (n_samples,)\n The score of each sample\
|
19 |
+
\ in X.\n \"\"\"\n n_samples = X.shape[0]\n\n depths = np.zeros(n_samples,\
|
20 |
+
\ order=\"f\")\n\n average_path_length_max_samples = _average_path_length([self._max_samples])\n\
|
21 |
+
\n # Note: we use default n_jobs value, i.e. sequential computation, which\n\
|
22 |
+
\ # we expect to be more performant that parallelizing for small number\n\
|
23 |
+
\ # of samples, e.g. < 1k samples. Default n_jobs value can be overridden\n\
|
24 |
+
\ # by using joblib.parallel_backend context manager around\n #\
|
25 |
+
\ ._compute_score_samples. Using a higher n_jobs may speed up the\n # computation\
|
26 |
+
\ of the scores, e.g. for > 1k samples. See\n # https://github.com/scikit-learn/scikit-learn/pull/28622\
|
27 |
+
\ for more\n # details.\n lock = threading.Lock()\n Parallel(\n\
|
28 |
+
\ verbose=self.verbose,\n require=\"sharedmem\",\n \
|
29 |
+
\ )(\n delayed(_parallel_compute_tree_depths)(\n tree,\n\
|
30 |
+
\ X,\n features if subsample_features else None,\n\
|
31 |
+
\ self._decision_path_lengths[tree_idx],\n self._average_path_length_per_tree[tree_idx],\n\
|
32 |
+
\ depths,\n lock,\n )\n for\
|
33 |
+
\ tree_idx, (tree, features) in enumerate(\n zip(self.estimators_,\
|
34 |
+
\ self.estimators_features_)\n )\n )\n\n denominator\
|
35 |
+
\ = len(self.estimators_) * average_path_length_max_samples\n scores =\
|
36 |
+
\ 2 ** (\n # For a single training sample, denominator and depth are\
|
37 |
+
\ 0.\n # Therefore, we set the score manually to 1.\n -np.divide(\n\
|
38 |
+
\ depths, denominator, out=np.ones_like(depths), where=denominator\
|
39 |
+
\ != 0\n )\n )\n return scores"
|
40 |
+
- " def predict(self, X):\n return np.zeros(X.shape[0])"
|
41 |
+
- "def test_dist_threshold_invalid_parameters():\n X = [[0], [1]]\n with pytest.raises(ValueError,\
|
42 |
+
\ match=\"Exactly one of \"):\n AgglomerativeClustering(n_clusters=None,\
|
43 |
+
\ distance_threshold=None).fit(X)\n\n with pytest.raises(ValueError, match=\"\
|
44 |
+
Exactly one of \"):\n AgglomerativeClustering(n_clusters=2, distance_threshold=1).fit(X)\n\
|
45 |
+
\n X = [[0], [1]]\n with pytest.raises(ValueError, match=\"compute_full_tree\
|
46 |
+
\ must be True if\"):\n AgglomerativeClustering(\n n_clusters=None,\
|
47 |
+
\ distance_threshold=1, compute_full_tree=False\n ).fit(X)"
|
48 |
+
- source_sentence: sklearn tags
|
49 |
+
sentences:
|
50 |
+
- " def __sklearn_tags__(self):\n tags = super().__sklearn_tags__()\n\
|
51 |
+
\ tags.input_tags.sparse = True\n return tags"
|
52 |
+
- "class SelectFdr(_BaseFilter):\n \"\"\"Filter: Select the p-values for an estimated\
|
53 |
+
\ false discovery rate.\n\n This uses the Benjamini-Hochberg procedure. ``alpha``\
|
54 |
+
\ is an upper bound\n on the expected false discovery rate.\n\n Read more\
|
55 |
+
\ in the :ref:`User Guide <univariate_feature_selection>`.\n\n Parameters\n\
|
56 |
+
\ ----------\n score_func : callable, default=f_classif\n Function\
|
57 |
+
\ taking two arrays X and y, and returning a pair of arrays\n (scores,\
|
58 |
+
\ pvalues).\n Default is f_classif (see below \"See Also\"). The default\
|
59 |
+
\ function only\n works with classification tasks.\n\n alpha : float,\
|
60 |
+
\ default=5e-2\n The highest uncorrected p-value for features to keep.\n\
|
61 |
+
\n Attributes\n ----------\n scores_ : array-like of shape (n_features,)\n\
|
62 |
+
\ Scores of features.\n\n pvalues_ : array-like of shape (n_features,)\n\
|
63 |
+
\ p-values of feature scores.\n\n n_features_in_ : int\n Number\
|
64 |
+
\ of features seen during :term:`fit`.\n\n .. versionadded:: 0.24\n\n \
|
65 |
+
\ feature_names_in_ : ndarray of shape (`n_features_in_`,)\n Names of\
|
66 |
+
\ features seen during :term:`fit`. Defined only when `X`\n has feature\
|
67 |
+
\ names that are all strings.\n\n .. versionadded:: 1.0\n\n See Also\n\
|
68 |
+
\ --------\n f_classif : ANOVA F-value between label/feature for classification\
|
69 |
+
\ tasks.\n mutual_info_classif : Mutual information for a discrete target.\n\
|
70 |
+
\ chi2 : Chi-squared stats of non-negative features for classification tasks.\n\
|
71 |
+
\ f_regression : F-value between label/feature for regression tasks.\n mutual_info_regression\
|
72 |
+
\ : Mutual information for a continuous target.\n SelectPercentile : Select\
|
73 |
+
\ features based on percentile of the highest\n scores.\n SelectKBest\
|
74 |
+
\ : Select features based on the k highest scores.\n SelectFpr : Select features\
|
75 |
+
\ based on a false positive rate test.\n SelectFwe : Select features based\
|
76 |
+
\ on family-wise error rate.\n GenericUnivariateSelect : Univariate feature\
|
77 |
+
\ selector with configurable\n mode.\n\n References\n ----------\n\
|
78 |
+
\ https://en.wikipedia.org/wiki/False_discovery_rate\n\n Examples\n --------\n\
|
79 |
+
\ >>> from sklearn.datasets import load_breast_cancer\n >>> from sklearn.feature_selection\
|
80 |
+
\ import SelectFdr, chi2\n >>> X, y = load_breast_cancer(return_X_y=True)\n\
|
81 |
+
\ >>> X.shape\n (569, 30)\n >>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X,\
|
82 |
+
\ y)\n >>> X_new.shape\n (569, 16)\n \"\"\"\n\n _parameter_constraints:\
|
83 |
+
\ dict = {\n **_BaseFilter._parameter_constraints,\n \"alpha\":\
|
84 |
+
\ [Interval(Real, 0, 1, closed=\"both\")],\n }\n\n def __init__(self, score_func=f_classif,\
|
85 |
+
\ *, alpha=5e-2):\n super().__init__(score_func=score_func)\n self.alpha\
|
86 |
+
\ = alpha\n\n def _get_support_mask(self):\n check_is_fitted(self)\n\
|
87 |
+
\n n_features = len(self.pvalues_)\n sv = np.sort(self.pvalues_)\n\
|
88 |
+
\ selected = sv[\n sv <= float(self.alpha) / n_features * np.arange(1,\
|
89 |
+
\ n_features + 1)\n ]\n if selected.size == 0:\n return\
|
90 |
+
\ np.zeros_like(self.pvalues_, dtype=bool)\n return self.pvalues_ <= selected.max()"
|
91 |
+
- "def test_absolute_error():\n # For coverage only.\n X, y = make_regression(n_samples=500,\
|
92 |
+
\ random_state=0)\n gbdt = HistGradientBoostingRegressor(loss=\"absolute_error\"\
|
93 |
+
, random_state=0)\n gbdt.fit(X, y)\n assert gbdt.score(X, y) > 0.9"
|
94 |
+
- source_sentence: test lsvc intercept scaling zero
|
95 |
+
sentences:
|
96 |
+
- "class BaggingClassifier(ClassifierMixin, BaseBagging):\n \"\"\"A Bagging classifier.\n\
|
97 |
+
\n A Bagging classifier is an ensemble meta-estimator that fits base\n classifiers\
|
98 |
+
\ each on random subsets of the original dataset and then\n aggregate their\
|
99 |
+
\ individual predictions (either by voting or by averaging)\n to form a final\
|
100 |
+
\ prediction. Such a meta-estimator can typically be used as\n a way to reduce\
|
101 |
+
\ the variance of a black-box estimator (e.g., a decision\n tree), by introducing\
|
102 |
+
\ randomization into its construction procedure and\n then making an ensemble\
|
103 |
+
\ out of it.\n\n This algorithm encompasses several works from the literature.\
|
104 |
+
\ When random\n subsets of the dataset are drawn as random subsets of the samples,\
|
105 |
+
\ then\n this algorithm is known as Pasting [1]_. If samples are drawn with\n\
|
106 |
+
\ replacement, then the method is known as Bagging [2]_. When random subsets\n\
|
107 |
+
\ of the dataset are drawn as random subsets of the features, then the method\n\
|
108 |
+
\ is known as Random Subspaces [3]_. Finally, when base estimators are built\n\
|
109 |
+
\ on subsets of both samples and features, then the method is known as\n \
|
110 |
+
\ Random Patches [4]_.\n\n Read more in the :ref:`User Guide <bagging>`.\n\
|
111 |
+
\n .. versionadded:: 0.15\n\n Parameters\n ----------\n estimator\
|
112 |
+
\ : object, default=None\n The base estimator to fit on random subsets\
|
113 |
+
\ of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeClassifier`.\n\
|
114 |
+
\n .. versionadded:: 1.2\n `base_estimator` was renamed to `estimator`.\n\
|
115 |
+
\n n_estimators : int, default=10\n The number of base estimators in\
|
116 |
+
\ the ensemble.\n\n max_samples : int or float, default=1.0\n The number\
|
117 |
+
\ of samples to draw from X to train each base estimator (with\n replacement\
|
118 |
+
\ by default, see `bootstrap` for more details).\n\n - If int, then draw\
|
119 |
+
\ `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]`\
|
120 |
+
\ samples.\n\n max_features : int or float, default=1.0\n The number\
|
121 |
+
\ of features to draw from X to train each base estimator (\n without replacement\
|
122 |
+
\ by default, see `bootstrap_features` for more\n details).\n\n \
|
123 |
+
\ - If int, then draw `max_features` features.\n - If float, then draw\
|
124 |
+
\ `max(1, int(max_features * n_features_in_))` features.\n\n bootstrap : bool,\
|
125 |
+
\ default=True\n Whether samples are drawn with replacement. If False,\
|
126 |
+
\ sampling\n without replacement is performed.\n\n bootstrap_features\
|
127 |
+
\ : bool, default=False\n Whether features are drawn with replacement.\n\
|
128 |
+
\n oob_score : bool, default=False\n Whether to use out-of-bag samples\
|
129 |
+
\ to estimate\n the generalization error. Only available if bootstrap=True.\n\
|
130 |
+
\n warm_start : bool, default=False\n When set to True, reuse the solution\
|
131 |
+
\ of the previous call to fit\n and add more estimators to the ensemble,\
|
132 |
+
\ otherwise, just fit\n a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\
|
133 |
+
\n .. versionadded:: 0.17\n *warm_start* constructor parameter.\n\
|
134 |
+
\n n_jobs : int, default=None\n The number of jobs to run in parallel\
|
135 |
+
\ for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in\
|
136 |
+
\ a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n\
|
137 |
+
\ processors. See :term:`Glossary <n_jobs>` for more details.\n\n random_state\
|
138 |
+
\ : int, RandomState instance or None, default=None\n Controls the random\
|
139 |
+
\ resampling of the original dataset\n (sample wise and feature wise).\n\
|
140 |
+
\ If the base estimator accepts a `random_state` attribute, a different\n\
|
141 |
+
\ seed is generated for each instance in the ensemble.\n Pass an\
|
142 |
+
\ int for reproducible output across multiple function calls.\n See :term:`Glossary\
|
143 |
+
\ <random_state>`.\n\n verbose : int, default=0\n Controls the verbosity\
|
144 |
+
\ when fitting and predicting.\n\n Attributes\n ----------\n estimator_\
|
145 |
+
\ : estimator\n The base estimator from which the ensemble is grown.\n\n\
|
146 |
+
\ .. versionadded:: 1.2\n `base_estimator_` was renamed to `estimator_`.\n\
|
147 |
+
\n n_features_in_ : int\n Number of features seen during :term:`fit`.\n\
|
148 |
+
\n .. versionadded:: 0.24\n\n feature_names_in_ : ndarray of shape (`n_features_in_`,)\n\
|
149 |
+
\ Names of features seen during :term:`fit`. Defined only when `X`\n \
|
150 |
+
\ has feature names that are all strings.\n\n .. versionadded:: 1.0\n\
|
151 |
+
\n estimators_ : list of estimators\n The collection of fitted base\
|
152 |
+
\ estimators.\n\n estimators_samples_ : list of arrays\n The subset\
|
153 |
+
\ of drawn samples (i.e., the in-bag samples) for each base\n estimator.\
|
154 |
+
\ Each subset is defined by an array of the indices selected.\n\n estimators_features_\
|
155 |
+
\ : list of arrays\n The subset of drawn features for each base estimator.\n\
|
156 |
+
\n classes_ : ndarray of shape (n_classes,)\n The classes labels.\n\n\
|
157 |
+
\ n_classes_ : int or list\n The number of classes.\n\n oob_score_\
|
158 |
+
\ : float\n Score of the training dataset obtained using an out-of-bag\
|
159 |
+
\ estimate.\n This attribute exists only when ``oob_score`` is True.\n\n\
|
160 |
+
\ oob_decision_function_ : ndarray of shape (n_samples, n_classes)\n \
|
161 |
+
\ Decision function computed with out-of-bag estimate on the training\n \
|
162 |
+
\ set. If n_estimators is small it might be possible that a data point\n \
|
163 |
+
\ was never left out during the bootstrap. In this case,\n `oob_decision_function_`\
|
164 |
+
\ might contain NaN. This attribute exists\n only when ``oob_score`` is\
|
165 |
+
\ True.\n\n See Also\n --------\n BaggingRegressor : A Bagging regressor.\n\
|
166 |
+
\n References\n ----------\n\n .. [1] L. Breiman, \"Pasting small votes\
|
167 |
+
\ for classification in large\n databases and on-line\", Machine Learning,\
|
168 |
+
\ 36(1), 85-103, 1999.\n\n .. [2] L. Breiman, \"Bagging predictors\", Machine\
|
169 |
+
\ Learning, 24(2), 123-140,\n 1996.\n\n .. [3] T. Ho, \"The random\
|
170 |
+
\ subspace method for constructing decision\n forests\", Pattern Analysis\
|
171 |
+
\ and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n .. [4] G.\
|
172 |
+
\ Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning\
|
173 |
+
\ and Knowledge Discovery in Databases, 346-361, 2012.\n\n Examples\n --------\n\
|
174 |
+
\ >>> from sklearn.svm import SVC\n >>> from sklearn.ensemble import BaggingClassifier\n\
|
175 |
+
\ >>> from sklearn.datasets import make_classification\n >>> X, y = make_classification(n_samples=100,\
|
176 |
+
\ n_features=4,\n ... n_informative=2, n_redundant=0,\n\
|
177 |
+
\ ... random_state=0, shuffle=False)\n >>> clf\
|
178 |
+
\ = BaggingClassifier(estimator=SVC(),\n ... n_estimators=10,\
|
179 |
+
\ random_state=0).fit(X, y)\n >>> clf.predict([[0, 0, 0, 0]])\n array([1])\n\
|
180 |
+
\ \"\"\"\n\n def __init__(\n self,\n estimator=None,\n \
|
181 |
+
\ n_estimators=10,\n *,\n max_samples=1.0,\n max_features=1.0,\n\
|
182 |
+
\ bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n\
|
183 |
+
\ warm_start=False,\n n_jobs=None,\n random_state=None,\n\
|
184 |
+
\ verbose=0,\n ):\n super().__init__(\n estimator=estimator,\n\
|
185 |
+
\ n_estimators=n_estimators,\n max_samples=max_samples,\n\
|
186 |
+
\ max_features=max_features,\n bootstrap=bootstrap,\n \
|
187 |
+
\ bootstrap_features=bootstrap_features,\n oob_score=oob_score,\n\
|
188 |
+
\ warm_start=warm_start,\n n_jobs=n_jobs,\n random_state=random_state,\n\
|
189 |
+
\ verbose=verbose,\n )\n\n def _get_estimator(self):\n \
|
190 |
+
\ \"\"\"Resolve which estimator to return (default is DecisionTreeClassifier)\"\
|
191 |
+
\"\"\n if self.estimator is None:\n return DecisionTreeClassifier()\n\
|
192 |
+
\ return self.estimator\n\n def _set_oob_score(self, X, y):\n \
|
193 |
+
\ n_samples = y.shape[0]\n n_classes_ = self.n_classes_\n\n predictions\
|
194 |
+
\ = np.zeros((n_samples, n_classes_))\n\n for estimator, samples, features\
|
195 |
+
\ in zip(\n self.estimators_, self.estimators_samples_, self.estimators_features_\n\
|
196 |
+
\ ):\n # Create mask for OOB samples\n mask = ~indices_to_mask(samples,\
|
197 |
+
\ n_samples)\n\n if hasattr(estimator, \"predict_proba\"):\n \
|
198 |
+
\ predictions[mask, :] += estimator.predict_proba(\n \
|
199 |
+
\ (X[mask, :])[:, features]\n )\n\n else:\n \
|
200 |
+
\ p = estimator.predict((X[mask, :])[:, features])\n \
|
201 |
+
\ j = 0\n\n for i in range(n_samples):\n if\
|
202 |
+
\ mask[i]:\n predictions[i, p[j]] += 1\n \
|
203 |
+
\ j += 1\n\n if (predictions.sum(axis=1) == 0).any():\n \
|
204 |
+
\ warn(\n \"Some inputs do not have OOB scores. \"\n \
|
205 |
+
\ \"This probably means too few estimators were used \"\n \
|
206 |
+
\ \"to compute any reliable oob estimates.\"\n )\n\n oob_decision_function\
|
207 |
+
\ = predictions / predictions.sum(axis=1)[:, np.newaxis]\n oob_score =\
|
208 |
+
\ accuracy_score(y, np.argmax(predictions, axis=1))\n\n self.oob_decision_function_\
|
209 |
+
\ = oob_decision_function\n self.oob_score_ = oob_score\n\n def _validate_y(self,\
|
210 |
+
\ y):\n y = column_or_1d(y, warn=True)\n check_classification_targets(y)\n\
|
211 |
+
\ self.classes_, y = np.unique(y, return_inverse=True)\n self.n_classes_\
|
212 |
+
\ = len(self.classes_)\n\n return y\n\n def predict(self, X, **params):\n\
|
213 |
+
\ \"\"\"Predict class for X.\n\n The predicted class of an input\
|
214 |
+
\ sample is computed as the class with\n the highest mean predicted probability.\
|
215 |
+
\ If base estimators do not\n implement a ``predict_proba`` method, then\
|
216 |
+
\ it resorts to voting.\n\n Parameters\n ----------\n X :\
|
217 |
+
\ {array-like, sparse matrix} of shape (n_samples, n_features)\n The\
|
218 |
+
\ training input samples. Sparse matrices are accepted only if\n they\
|
219 |
+
\ are supported by the base estimator.\n\n **params : dict\n \
|
220 |
+
\ Parameters routed to the `predict_proba` (if available) or the `predict`\n\
|
221 |
+
\ method (otherwise) of the sub-estimators via the metadata routing\
|
222 |
+
\ API.\n\n .. versionadded:: 1.7\n\n Only available\
|
223 |
+
\ if\n `sklearn.set_config(enable_metadata_routing=True)` is set.\
|
224 |
+
\ See\n :ref:`Metadata Routing User Guide <metadata_routing>` for\
|
225 |
+
\ more\n details.\n\n Returns\n -------\n \
|
226 |
+
\ y : ndarray of shape (n_samples,)\n The predicted classes.\n \
|
227 |
+
\ \"\"\"\n _raise_for_params(params, self, \"predict\")\n\n predicted_probabilitiy\
|
228 |
+
\ = self.predict_proba(X, **params)\n return self.classes_.take((np.argmax(predicted_probabilitiy,\
|
229 |
+
\ axis=1)), axis=0)\n\n def predict_proba(self, X, **params):\n \"\"\
|
230 |
+
\"Predict class probabilities for X.\n\n The predicted class probabilities\
|
231 |
+
\ of an input sample is computed as\n the mean predicted class probabilities\
|
232 |
+
\ of the base estimators in the\n ensemble. If base estimators do not implement\
|
233 |
+
\ a ``predict_proba``\n method, then it resorts to voting and the predicted\
|
234 |
+
\ class probabilities\n of an input sample represents the proportion of\
|
235 |
+
\ estimators predicting\n each class.\n\n Parameters\n ----------\n\
|
236 |
+
\ X : {array-like, sparse matrix} of shape (n_samples, n_features)\n \
|
237 |
+
\ The training input samples. Sparse matrices are accepted only if\n\
|
238 |
+
\ they are supported by the base estimator.\n\n **params : dict\n\
|
239 |
+
\ Parameters routed to the `predict_proba` (if available) or the `predict`\n\
|
240 |
+
\ method (otherwise) of the sub-estimators via the metadata routing\
|
241 |
+
\ API.\n\n .. versionadded:: 1.7\n\n Only available\
|
242 |
+
\ if\n `sklearn.set_config(enable_metadata_routing=True)` is set.\
|
243 |
+
\ See\n :ref:`Metadata Routing User Guide <metadata_routing>` for\
|
244 |
+
\ more\n details.\n\n Returns\n -------\n \
|
245 |
+
\ p : ndarray of shape (n_samples, n_classes)\n The class probabilities\
|
246 |
+
\ of the input samples. The order of the\n classes corresponds to that\
|
247 |
+
\ in the attribute :term:`classes_`.\n \"\"\"\n _raise_for_params(params,\
|
248 |
+
\ self, \"predict_proba\")\n\n check_is_fitted(self)\n # Check data\n\
|
249 |
+
\ X = validate_data(\n self,\n X,\n accept_sparse=[\"\
|
250 |
+
csr\", \"csc\"],\n dtype=None,\n ensure_all_finite=False,\n\
|
251 |
+
\ reset=False,\n )\n\n if _routing_enabled():\n \
|
252 |
+
\ routed_params = process_routing(self, \"predict_proba\", **params)\n \
|
253 |
+
\ else:\n routed_params = Bunch()\n routed_params.estimator\
|
254 |
+
\ = Bunch(predict_proba=Bunch())\n\n # Parallel loop\n n_jobs, _,\
|
255 |
+
\ starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n all_proba\
|
256 |
+
\ = Parallel(\n n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n\
|
257 |
+
\ )(\n delayed(_parallel_predict_proba)(\n self.estimators_[starts[i]\
|
258 |
+
\ : starts[i + 1]],\n self.estimators_features_[starts[i] : starts[i\
|
259 |
+
\ + 1]],\n X,\n self.n_classes_,\n \
|
260 |
+
\ predict_params=routed_params.estimator.get(\"predict\", None),\n \
|
261 |
+
\ predict_proba_params=routed_params.estimator.get(\"predict_proba\", None),\n\
|
262 |
+
\ )\n for i in range(n_jobs)\n )\n\n # Reduce\n\
|
263 |
+
\ proba = sum(all_proba) / self.n_estimators\n\n return proba\n\n\
|
264 |
+
\ def predict_log_proba(self, X, **params):\n \"\"\"Predict class log-probabilities\
|
265 |
+
\ for X.\n\n The predicted class log-probabilities of an input sample is\
|
266 |
+
\ computed as\n the log of the mean predicted class probabilities of the\
|
267 |
+
\ base\n estimators in the ensemble.\n\n Parameters\n ----------\n\
|
268 |
+
\ X : {array-like, sparse matrix} of shape (n_samples, n_features)\n \
|
269 |
+
\ The training input samples. Sparse matrices are accepted only if\n\
|
270 |
+
\ they are supported by the base estimator.\n\n **params : dict\n\
|
271 |
+
\ Parameters routed to the `predict_log_proba`, the `predict_proba`\
|
272 |
+
\ or the\n `proba` method of the sub-estimators via the metadata routing\
|
273 |
+
\ API. The\n routing is tried in the mentioned order depending on whether\
|
274 |
+
\ this method is\n available on the sub-estimator.\n\n ..\
|
275 |
+
\ versionadded:: 1.7\n\n Only available if\n `sklearn.set_config(enable_metadata_routing=True)`\
|
276 |
+
\ is set. See\n :ref:`Metadata Routing User Guide <metadata_routing>`\
|
277 |
+
\ for more\n details.\n\n Returns\n -------\n \
|
278 |
+
\ p : ndarray of shape (n_samples, n_classes)\n The class log-probabilities\
|
279 |
+
\ of the input samples. The order of the\n classes corresponds to that\
|
280 |
+
\ in the attribute :term:`classes_`.\n \"\"\"\n _raise_for_params(params,\
|
281 |
+
\ self, \"predict_log_proba\")\n\n check_is_fitted(self)\n\n if\
|
282 |
+
\ hasattr(self.estimator_, \"predict_log_proba\"):\n # Check data\n\
|
283 |
+
\ X = validate_data(\n self,\n X,\n \
|
284 |
+
\ accept_sparse=[\"csr\", \"csc\"],\n dtype=None,\n\
|
285 |
+
\ ensure_all_finite=False,\n reset=False,\n \
|
286 |
+
\ )\n\n if _routing_enabled():\n routed_params\
|
287 |
+
\ = process_routing(self, \"predict_log_proba\", **params)\n else:\n\
|
288 |
+
\ routed_params = Bunch()\n routed_params.estimator\
|
289 |
+
\ = Bunch(predict_log_proba=Bunch())\n\n # Parallel loop\n \
|
290 |
+
\ n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\
|
291 |
+
\n all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n\
|
292 |
+
\ delayed(_parallel_predict_log_proba)(\n self.estimators_[starts[i]\
|
293 |
+
\ : starts[i + 1]],\n self.estimators_features_[starts[i] :\
|
294 |
+
\ starts[i + 1]],\n X,\n self.n_classes_,\n\
|
295 |
+
\ params=routed_params.estimator.predict_log_proba,\n \
|
296 |
+
\ )\n for i in range(n_jobs)\n )\n\n \
|
297 |
+
\ # Reduce\n log_proba = all_log_proba[0]\n\n for\
|
298 |
+
\ j in range(1, len(all_log_proba)):\n log_proba = np.logaddexp(log_proba,\
|
299 |
+
\ all_log_proba[j])\n\n log_proba -= np.log(self.n_estimators)\n\n\
|
300 |
+
\ else:\n log_proba = np.log(self.predict_proba(X, **params))\n\
|
301 |
+
\n return log_proba\n\n @available_if(\n _estimator_has(\"decision_function\"\
|
302 |
+
, delegates=(\"estimators_\", \"estimator\"))\n )\n def decision_function(self,\
|
303 |
+
\ X, **params):\n \"\"\"Average of the decision functions of the base classifiers.\n\
|
304 |
+
\n Parameters\n ----------\n X : {array-like, sparse matrix}\
|
305 |
+
\ of shape (n_samples, n_features)\n The training input samples. Sparse\
|
306 |
+
\ matrices are accepted only if\n they are supported by the base estimator.\n\
|
307 |
+
\n **params : dict\n Parameters routed to the `decision_function`\
|
308 |
+
\ method of the sub-estimators\n via the metadata routing API.\n\n\
|
309 |
+
\ .. versionadded:: 1.7\n\n Only available if\n \
|
310 |
+
\ `sklearn.set_config(enable_metadata_routing=True)` is set. See\n\
|
311 |
+
\ :ref:`Metadata Routing User Guide <metadata_routing>` for more\n\
|
312 |
+
\ details.\n\n Returns\n -------\n score :\
|
313 |
+
\ ndarray of shape (n_samples, k)\n The decision function of the input\
|
314 |
+
\ samples. The columns correspond\n to the classes in sorted order,\
|
315 |
+
\ as they appear in the attribute\n ``classes_``. Regression and binary\
|
316 |
+
\ classification are special\n cases with ``k == 1``, otherwise ``k==n_classes``.\n\
|
317 |
+
\ \"\"\"\n _raise_for_params(params, self, \"decision_function\"\
|
318 |
+
)\n\n check_is_fitted(self)\n\n # Check data\n X = validate_data(\n\
|
319 |
+
\ self,\n X,\n accept_sparse=[\"csr\", \"csc\"\
|
320 |
+
],\n dtype=None,\n ensure_all_finite=False,\n \
|
321 |
+
\ reset=False,\n )\n\n if _routing_enabled():\n routed_params\
|
322 |
+
\ = process_routing(self, \"decision_function\", **params)\n else:\n \
|
323 |
+
\ routed_params = Bunch()\n routed_params.estimator = Bunch(decision_function=Bunch())\n\
|
324 |
+
\n # Parallel loop\n n_jobs, _, starts = _partition_estimators(self.n_estimators,\
|
325 |
+
\ self.n_jobs)\n\n all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n\
|
326 |
+
\ delayed(_parallel_decision_function)(\n self.estimators_[starts[i]\
|
327 |
+
\ : starts[i + 1]],\n self.estimators_features_[starts[i] : starts[i\
|
328 |
+
\ + 1]],\n X,\n params=routed_params.estimator.decision_function,\n\
|
329 |
+
\ )\n for i in range(n_jobs)\n )\n\n # Reduce\n\
|
330 |
+
\ decisions = sum(all_decisions) / self.n_estimators\n\n return\
|
331 |
+
\ decisions"
|
332 |
+
- " def get_n_splits(self, X=None, y=None, groups=None):\n return self.n_splits"
|
333 |
+
- "def test_lsvc_intercept_scaling_zero():\n # Test that intercept_scaling is\
|
334 |
+
\ ignored when fit_intercept is False\n\n lsvc = svm.LinearSVC(fit_intercept=False)\n\
|
335 |
+
\ lsvc.fit(X, Y)\n assert lsvc.intercept_ == 0.0"
|
336 |
+
- source_sentence: test power transformer 1d
|
337 |
+
sentences:
|
338 |
+
- "def test_power_transformer_1d():\n X = np.abs(X_1col)\n\n for standardize\
|
339 |
+
\ in [True, False]:\n pt = PowerTransformer(method=\"box-cox\", standardize=standardize)\n\
|
340 |
+
\n X_trans = pt.fit_transform(X)\n X_trans_func = power_transform(X,\
|
341 |
+
\ method=\"box-cox\", standardize=standardize)\n\n X_expected, lambda_expected\
|
342 |
+
\ = stats.boxcox(X.flatten())\n\n if standardize:\n X_expected\
|
343 |
+
\ = scale(X_expected)\n\n assert_almost_equal(X_expected.reshape(-1, 1),\
|
344 |
+
\ X_trans)\n assert_almost_equal(X_expected.reshape(-1, 1), X_trans_func)\n\
|
345 |
+
\n assert_almost_equal(X, pt.inverse_transform(X_trans))\n assert_almost_equal(lambda_expected,\
|
346 |
+
\ pt.lambdas_[0])\n\n assert len(pt.lambdas_) == X.shape[1]\n assert\
|
347 |
+
\ isinstance(pt.lambdas_, np.ndarray)"
|
348 |
+
- "def test_hdbscan_feature_array():\n \"\"\"\n Tests that HDBSCAN works with\
|
349 |
+
\ feature array, including an arbitrary\n goodness of fit check. Note that\
|
350 |
+
\ the check is a simple heuristic.\n \"\"\"\n labels = HDBSCAN().fit_predict(X)\n\
|
351 |
+
\n # Check that clustering is arbitrarily good\n # This is a heuristic to\
|
352 |
+
\ guard against regression\n check_label_quality(labels)"
|
353 |
+
- "def test_pca_initialization_not_compatible_with_sparse_input(csr_container):\n\
|
354 |
+
\ # Sparse input matrices cannot use PCA initialization.\n tsne = TSNE(init=\"\
|
355 |
+
pca\", learning_rate=100.0, perplexity=1)\n with pytest.raises(TypeError, match=\"\
|
356 |
+
PCA initialization.*\"):\n tsne.fit_transform(csr_container([[0, 5], [5,\
|
357 |
+
\ 0]]))"
|
358 |
+
- source_sentence: Evaluate predicted target values for X relative to y_true
|
359 |
+
sentences:
|
360 |
+
- "def test_hdbscan_usable_inputs(X, kwargs):\n \"\"\"\n Tests that HDBSCAN\
|
361 |
+
\ works correctly for array-likes and precomputed inputs\n with non-finite\
|
362 |
+
\ points.\n \"\"\"\n HDBSCAN(min_samples=1, **kwargs).fit(X)"
|
363 |
+
- " def __call__(self, estimator, X, y_true, sample_weight=None, **kwargs):\n\
|
364 |
+
\ \"\"\"Evaluate predicted target values for X relative to y_true.\n\n\
|
365 |
+
\ Parameters\n ----------\n estimator : object\n \
|
366 |
+
\ Trained estimator to use for scoring. Must have a predict_proba\n \
|
367 |
+
\ method; the output of that is used to compute the score.\n\n X :\
|
368 |
+
\ {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\
|
369 |
+
\n y_true : array-like\n Gold standard target values for X.\n\
|
370 |
+
\n sample_weight : array-like of shape (n_samples,), default=None\n \
|
371 |
+
\ Sample weights.\n\n **kwargs : dict\n Other parameters\
|
372 |
+
\ passed to the scorer. Refer to\n :func:`set_score_request` for more\
|
373 |
+
\ details.\n\n Only available if `enable_metadata_routing=True`. See\
|
374 |
+
\ the\n :ref:`User Guide <metadata_routing>`.\n\n .. versionadded::\
|
375 |
+
\ 1.3\n\n Returns\n -------\n score : float\n \
|
376 |
+
\ Score function applied to prediction of estimator on X.\n \"\"\"\n \
|
377 |
+
\ # TODO (1.8): remove in 1.8 (scoring=\"max_error\" has been deprecated\
|
378 |
+
\ in 1.6)\n if self._deprecation_msg is not None:\n warnings.warn(\n\
|
379 |
+
\ self._deprecation_msg, category=DeprecationWarning, stacklevel=2\n\
|
380 |
+
\ )\n\n _raise_for_params(kwargs, self, None)\n\n _kwargs\
|
381 |
+
\ = copy.deepcopy(kwargs)\n if sample_weight is not None:\n \
|
382 |
+
\ _kwargs[\"sample_weight\"] = sample_weight\n\n return self._score(partial(_cached_call,\
|
383 |
+
\ None), estimator, X, y_true, **_kwargs)"
|
384 |
+
- ' def set_inverse_transform_request(self, **kwargs): pass'
|
385 |
+
pipeline_tag: sentence-similarity
|
386 |
+
library_name: sentence-transformers
|
387 |
+
---
|
388 |
+
|
389 |
+
# SentenceTransformer based on intfloat/e5-small-v2
|
390 |
+
|
391 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/e5-small-v2](https://huggingface.co/intfloat/e5-small-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
392 |
+
|
393 |
+
## Model Details
|
394 |
+
|
395 |
+
### Model Description
|
396 |
+
- **Model Type:** Sentence Transformer
|
397 |
+
- **Base model:** [intfloat/e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) <!-- at revision ffb93f3bd4047442299a41ebb6fa998a38507c52 -->
|
398 |
+
- **Maximum Sequence Length:** 512 tokens
|
399 |
+
- **Output Dimensionality:** 384 dimensions
|
400 |
+
- **Similarity Function:** Cosine Similarity
|
401 |
+
<!-- - **Training Dataset:** Unknown -->
|
402 |
+
<!-- - **Language:** Unknown -->
|
403 |
+
<!-- - **License:** Unknown -->
|
404 |
+
|
405 |
+
### Model Sources
|
406 |
+
|
407 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
408 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
409 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
410 |
+
|
411 |
+
### Full Model Architecture
|
412 |
+
|
413 |
+
```
|
414 |
+
SentenceTransformer(
|
415 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: PeftModelForFeatureExtraction
|
416 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
417 |
+
(2): Normalize()
|
418 |
+
)
|
419 |
+
```
|
420 |
+
|
421 |
+
## Usage
|
422 |
+
|
423 |
+
### Direct Usage (Sentence Transformers)
|
424 |
+
|
425 |
+
First install the Sentence Transformers library:
|
426 |
+
|
427 |
+
```bash
|
428 |
+
pip install -U sentence-transformers
|
429 |
+
```
|
430 |
+
|
431 |
+
Then you can load this model and run inference.
|
432 |
+
```python
|
433 |
+
from sentence_transformers import SentenceTransformer
|
434 |
+
|
435 |
+
# Download from the 🤗 Hub
|
436 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
437 |
+
# Run inference
|
438 |
+
sentences = [
|
439 |
+
'Evaluate predicted target values for X relative to y_true',
|
440 |
+
' def __call__(self, estimator, X, y_true, sample_weight=None, **kwargs):\n """Evaluate predicted target values for X relative to y_true.\n\n Parameters\n ----------\n estimator : object\n Trained estimator to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\n y_true : array-like\n Gold standard target values for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n **kwargs : dict\n Other parameters passed to the scorer. Refer to\n :func:`set_score_request` for more details.\n\n Only available if `enable_metadata_routing=True`. See the\n :ref:`User Guide <metadata_routing>`.\n\n .. versionadded:: 1.3\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n """\n # TODO (1.8): remove in 1.8 (scoring="max_error" has been deprecated in 1.6)\n if self._deprecation_msg is not None:\n warnings.warn(\n self._deprecation_msg, category=DeprecationWarning, stacklevel=2\n )\n\n _raise_for_params(kwargs, self, None)\n\n _kwargs = copy.deepcopy(kwargs)\n if sample_weight is not None:\n _kwargs["sample_weight"] = sample_weight\n\n return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)',
|
441 |
+
'def test_hdbscan_usable_inputs(X, kwargs):\n """\n Tests that HDBSCAN works correctly for array-likes and precomputed inputs\n with non-finite points.\n """\n HDBSCAN(min_samples=1, **kwargs).fit(X)',
|
442 |
+
]
|
443 |
+
embeddings = model.encode(sentences)
|
444 |
+
print(embeddings.shape)
|
445 |
+
# [3, 384]
|
446 |
+
|
447 |
+
# Get the similarity scores for the embeddings
|
448 |
+
similarities = model.similarity(embeddings, embeddings)
|
449 |
+
print(similarities.shape)
|
450 |
+
# [3, 3]
|
451 |
+
```
|
452 |
+
|
453 |
+
<!--
|
454 |
+
### Direct Usage (Transformers)
|
455 |
+
|
456 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
457 |
+
|
458 |
+
</details>
|
459 |
+
-->
|
460 |
+
|
461 |
+
<!--
|
462 |
+
### Downstream Usage (Sentence Transformers)
|
463 |
+
|
464 |
+
You can finetune this model on your own dataset.
|
465 |
+
|
466 |
+
<details><summary>Click to expand</summary>
|
467 |
+
|
468 |
+
</details>
|
469 |
+
-->
|
470 |
+
|
471 |
+
<!--
|
472 |
+
### Out-of-Scope Use
|
473 |
+
|
474 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
475 |
+
-->
|
476 |
+
|
477 |
+
<!--
|
478 |
+
## Bias, Risks and Limitations
|
479 |
+
|
480 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
481 |
+
-->
|
482 |
+
|
483 |
+
<!--
|
484 |
+
### Recommendations
|
485 |
+
|
486 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
487 |
+
-->
|
488 |
+
|
489 |
+
## Training Details
|
490 |
+
|
491 |
+
### Training Dataset
|
492 |
+
|
493 |
+
#### Unnamed Dataset
|
494 |
+
|
495 |
+
* Size: 13,734 training samples
|
496 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
497 |
+
* Approximate statistics based on the first 1000 samples:
|
498 |
+
| | sentence_0 | sentence_1 |
|
499 |
+
|:--------|:---------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
500 |
+
| type | string | string |
|
501 |
+
| details | <ul><li>min: 3 tokens</li><li>mean: 8.78 tokens</li><li>max: 63 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 233.15 tokens</li><li>max: 512 tokens</li></ul> |
|
502 |
+
* Samples:
|
503 |
+
| sentence_0 | sentence_1 |
|
504 |
+
|:-----------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
505 |
+
| <code>Get the estimator</code> | <code> def _get_estimator(self):<br> """Get the estimator.<br><br> Returns<br> -------<br> estimator_ : estimator object<br> The cloned estimator object.<br> """<br> # TODO(1.8): remove and only keep clone(self.estimator)<br> if self.estimator is None and self.base_estimator != "deprecated":<br> estimator_ = clone(self.base_estimator)<br><br> warn(<br> (<br> "`base_estimator` has been deprecated in 1.6 and will be removed"<br> " in 1.8. Please use `estimator` instead."<br> ),<br> FutureWarning,<br> )<br> # TODO(1.8) remove<br> elif self.estimator is None and self.base_estimator == "deprecated":<br> raise ValueError(<br> "You must pass an estimator to SelfTrainingClassifier. Use `estimator`."<br> )<br> elif self.estimator is not None and self.base_estimator != "deprecated":<br> raise ValueError(<br> "You must p...</code> |
|
506 |
+
| <code>Gaussian Naive Bayes (GaussianNB)</code> | <code>class GaussianNB(_BaseNB):<br> """<br> Gaussian Naive Bayes (GaussianNB).<br><br> Can perform online updates to model parameters via :meth:`partial_fit`.<br> For details on algorithm used to update feature means and variance online,<br> see `Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque<br> <http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf>`_.<br><br> Read more in the :ref:`User Guide <gaussian_naive_bayes>`.<br><br> Parameters<br> ----------<br> priors : array-like of shape (n_classes,), default=None<br> Prior probabilities of the classes. If specified, the priors are not<br> adjusted according to the data.<br><br> var_smoothing : float, default=1e-9<br> Portion of the largest variance of all features that is added to<br> variances for calculation stability.<br><br> .. versionadded:: 0.20<br><br> Attributes<br> ----------<br> class_count_ : ndarray of shape (n_classes,)<br> number of training samples observed in each class.<br><br> class_pri...</code> |
|
507 |
+
| <code>test rfe cv n jobs</code> | <code>def test_rfe_cv_n_jobs(global_random_seed):<br> generator = check_random_state(global_random_seed)<br> iris = load_iris()<br> X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]<br> y = iris.target<br><br> rfecv = RFECV(estimator=SVC(kernel="linear"))<br> rfecv.fit(X, y)<br> rfecv_ranking = rfecv.ranking_<br><br> rfecv_cv_results_ = rfecv.cv_results_<br><br> rfecv.set_params(n_jobs=2)<br> rfecv.fit(X, y)<br> assert_array_almost_equal(rfecv.ranking_, rfecv_ranking)<br><br> assert rfecv_cv_results_.keys() == rfecv.cv_results_.keys()<br> for key in rfecv_cv_results_.keys():<br> assert rfecv_cv_results_[key] == pytest.approx(rfecv.cv_results_[key])</code> |
|
508 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
509 |
+
```json
|
510 |
+
{
|
511 |
+
"scale": 20.0,
|
512 |
+
"similarity_fct": "cos_sim"
|
513 |
+
}
|
514 |
+
```
|
515 |
+
|
516 |
+
### Training Hyperparameters
|
517 |
+
#### Non-Default Hyperparameters
|
518 |
+
|
519 |
+
- `per_device_train_batch_size`: 16
|
520 |
+
- `per_device_eval_batch_size`: 16
|
521 |
+
- `num_train_epochs`: 1
|
522 |
+
- `fp16`: True
|
523 |
+
- `multi_dataset_batch_sampler`: round_robin
|
524 |
+
|
525 |
+
#### All Hyperparameters
|
526 |
+
<details><summary>Click to expand</summary>
|
527 |
+
|
528 |
+
- `overwrite_output_dir`: False
|
529 |
+
- `do_predict`: False
|
530 |
+
- `eval_strategy`: no
|
531 |
+
- `prediction_loss_only`: True
|
532 |
+
- `per_device_train_batch_size`: 16
|
533 |
+
- `per_device_eval_batch_size`: 16
|
534 |
+
- `per_gpu_train_batch_size`: None
|
535 |
+
- `per_gpu_eval_batch_size`: None
|
536 |
+
- `gradient_accumulation_steps`: 1
|
537 |
+
- `eval_accumulation_steps`: None
|
538 |
+
- `torch_empty_cache_steps`: None
|
539 |
+
- `learning_rate`: 5e-05
|
540 |
+
- `weight_decay`: 0.0
|
541 |
+
- `adam_beta1`: 0.9
|
542 |
+
- `adam_beta2`: 0.999
|
543 |
+
- `adam_epsilon`: 1e-08
|
544 |
+
- `max_grad_norm`: 1
|
545 |
+
- `num_train_epochs`: 1
|
546 |
+
- `max_steps`: -1
|
547 |
+
- `lr_scheduler_type`: linear
|
548 |
+
- `lr_scheduler_kwargs`: {}
|
549 |
+
- `warmup_ratio`: 0.0
|
550 |
+
- `warmup_steps`: 0
|
551 |
+
- `log_level`: passive
|
552 |
+
- `log_level_replica`: warning
|
553 |
+
- `log_on_each_node`: True
|
554 |
+
- `logging_nan_inf_filter`: True
|
555 |
+
- `save_safetensors`: True
|
556 |
+
- `save_on_each_node`: False
|
557 |
+
- `save_only_model`: False
|
558 |
+
- `restore_callback_states_from_checkpoint`: False
|
559 |
+
- `no_cuda`: False
|
560 |
+
- `use_cpu`: False
|
561 |
+
- `use_mps_device`: False
|
562 |
+
- `seed`: 42
|
563 |
+
- `data_seed`: None
|
564 |
+
- `jit_mode_eval`: False
|
565 |
+
- `use_ipex`: False
|
566 |
+
- `bf16`: False
|
567 |
+
- `fp16`: True
|
568 |
+
- `fp16_opt_level`: O1
|
569 |
+
- `half_precision_backend`: auto
|
570 |
+
- `bf16_full_eval`: False
|
571 |
+
- `fp16_full_eval`: False
|
572 |
+
- `tf32`: None
|
573 |
+
- `local_rank`: 0
|
574 |
+
- `ddp_backend`: None
|
575 |
+
- `tpu_num_cores`: None
|
576 |
+
- `tpu_metrics_debug`: False
|
577 |
+
- `debug`: []
|
578 |
+
- `dataloader_drop_last`: False
|
579 |
+
- `dataloader_num_workers`: 0
|
580 |
+
- `dataloader_prefetch_factor`: None
|
581 |
+
- `past_index`: -1
|
582 |
+
- `disable_tqdm`: False
|
583 |
+
- `remove_unused_columns`: True
|
584 |
+
- `label_names`: None
|
585 |
+
- `load_best_model_at_end`: False
|
586 |
+
- `ignore_data_skip`: False
|
587 |
+
- `fsdp`: []
|
588 |
+
- `fsdp_min_num_params`: 0
|
589 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
590 |
+
- `tp_size`: 0
|
591 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
592 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
593 |
+
- `deepspeed`: None
|
594 |
+
- `label_smoothing_factor`: 0.0
|
595 |
+
- `optim`: adamw_torch
|
596 |
+
- `optim_args`: None
|
597 |
+
- `adafactor`: False
|
598 |
+
- `group_by_length`: False
|
599 |
+
- `length_column_name`: length
|
600 |
+
- `ddp_find_unused_parameters`: None
|
601 |
+
- `ddp_bucket_cap_mb`: None
|
602 |
+
- `ddp_broadcast_buffers`: False
|
603 |
+
- `dataloader_pin_memory`: True
|
604 |
+
- `dataloader_persistent_workers`: False
|
605 |
+
- `skip_memory_metrics`: True
|
606 |
+
- `use_legacy_prediction_loop`: False
|
607 |
+
- `push_to_hub`: False
|
608 |
+
- `resume_from_checkpoint`: None
|
609 |
+
- `hub_model_id`: None
|
610 |
+
- `hub_strategy`: every_save
|
611 |
+
- `hub_private_repo`: None
|
612 |
+
- `hub_always_push`: False
|
613 |
+
- `gradient_checkpointing`: False
|
614 |
+
- `gradient_checkpointing_kwargs`: None
|
615 |
+
- `include_inputs_for_metrics`: False
|
616 |
+
- `include_for_metrics`: []
|
617 |
+
- `eval_do_concat_batches`: True
|
618 |
+
- `fp16_backend`: auto
|
619 |
+
- `push_to_hub_model_id`: None
|
620 |
+
- `push_to_hub_organization`: None
|
621 |
+
- `mp_parameters`:
|
622 |
+
- `auto_find_batch_size`: False
|
623 |
+
- `full_determinism`: False
|
624 |
+
- `torchdynamo`: None
|
625 |
+
- `ray_scope`: last
|
626 |
+
- `ddp_timeout`: 1800
|
627 |
+
- `torch_compile`: False
|
628 |
+
- `torch_compile_backend`: None
|
629 |
+
- `torch_compile_mode`: None
|
630 |
+
- `include_tokens_per_second`: False
|
631 |
+
- `include_num_input_tokens_seen`: False
|
632 |
+
- `neftune_noise_alpha`: None
|
633 |
+
- `optim_target_modules`: None
|
634 |
+
- `batch_eval_metrics`: False
|
635 |
+
- `eval_on_start`: False
|
636 |
+
- `use_liger_kernel`: False
|
637 |
+
- `eval_use_gather_object`: False
|
638 |
+
- `average_tokens_across_devices`: False
|
639 |
+
- `prompts`: None
|
640 |
+
- `batch_sampler`: batch_sampler
|
641 |
+
- `multi_dataset_batch_sampler`: round_robin
|
642 |
+
|
643 |
+
</details>
|
644 |
+
|
645 |
+
### Training Logs
|
646 |
+
| Epoch | Step | Training Loss |
|
647 |
+
|:------:|:----:|:-------------:|
|
648 |
+
| 0.5821 | 500 | 0.6129 |
|
649 |
+
|
650 |
+
|
651 |
+
### Framework Versions
|
652 |
+
- Python: 3.11.12
|
653 |
+
- Sentence Transformers: 3.4.1
|
654 |
+
- Transformers: 4.51.3
|
655 |
+
- PyTorch: 2.6.0+cu124
|
656 |
+
- Accelerate: 1.6.0
|
657 |
+
- Datasets: 3.5.1
|
658 |
+
- Tokenizers: 0.21.1
|
659 |
+
|
660 |
+
## Citation
|
661 |
+
|
662 |
+
### BibTeX
|
663 |
+
|
664 |
+
#### Sentence Transformers
|
665 |
+
```bibtex
|
666 |
+
@inproceedings{reimers-2019-sentence-bert,
|
667 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
668 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
669 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
670 |
+
month = "11",
|
671 |
+
year = "2019",
|
672 |
+
publisher = "Association for Computational Linguistics",
|
673 |
+
url = "https://arxiv.org/abs/1908.10084",
|
674 |
+
}
|
675 |
+
```
|
676 |
+
|
677 |
+
#### MultipleNegativesRankingLoss
|
678 |
+
```bibtex
|
679 |
+
@misc{henderson2017efficient,
|
680 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
681 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
682 |
+
year={2017},
|
683 |
+
eprint={1705.00652},
|
684 |
+
archivePrefix={arXiv},
|
685 |
+
primaryClass={cs.CL}
|
686 |
+
}
|
687 |
+
```
|
688 |
+
|
689 |
+
<!--
|
690 |
+
## Glossary
|
691 |
+
|
692 |
+
*Clearly define terms in order to be accessible across audiences.*
|
693 |
+
-->
|
694 |
+
|
695 |
+
<!--
|
696 |
+
## Model Card Authors
|
697 |
+
|
698 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
699 |
+
-->
|
700 |
+
|
701 |
+
<!--
|
702 |
+
## Model Card Contact
|
703 |
+
|
704 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
705 |
+
-->
|
adapter_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "intfloat/e5-small-v2",
|
5 |
+
"bias": "none",
|
6 |
+
"corda_config": null,
|
7 |
+
"eva_config": null,
|
8 |
+
"exclude_modules": null,
|
9 |
+
"fan_in_fan_out": false,
|
10 |
+
"inference_mode": true,
|
11 |
+
"init_lora_weights": true,
|
12 |
+
"layer_replication": null,
|
13 |
+
"layers_pattern": null,
|
14 |
+
"layers_to_transform": null,
|
15 |
+
"loftq_config": {},
|
16 |
+
"lora_alpha": 16,
|
17 |
+
"lora_bias": false,
|
18 |
+
"lora_dropout": 0.05,
|
19 |
+
"megatron_config": null,
|
20 |
+
"megatron_core": "megatron.core",
|
21 |
+
"modules_to_save": null,
|
22 |
+
"peft_type": "LORA",
|
23 |
+
"r": 4,
|
24 |
+
"rank_pattern": {},
|
25 |
+
"revision": null,
|
26 |
+
"target_modules": [
|
27 |
+
"dense_output",
|
28 |
+
"dense"
|
29 |
+
],
|
30 |
+
"task_type": "FEATURE_EXTRACTION",
|
31 |
+
"trainable_token_indices": null,
|
32 |
+
"use_dora": false,
|
33 |
+
"use_rslora": false
|
34 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:201f5edf9bb75d0229a0e91fe4c08d9660c377c47e26ccc6ab750e49a82b4035
|
3 |
+
size 906680
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.1",
|
4 |
+
"transformers": "4.51.3",
|
5 |
+
"pytorch": "2.6.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"extra_special_tokens": {},
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"pad_token": "[PAD]",
|
51 |
+
"sep_token": "[SEP]",
|
52 |
+
"strip_accents": null,
|
53 |
+
"tokenize_chinese_chars": true,
|
54 |
+
"tokenizer_class": "BertTokenizer",
|
55 |
+
"unk_token": "[UNK]"
|
56 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|