Spaces:
Running
Running
Delete Transaction_Error_LSTM_LSTM_weight_Raznost_steps_per_epoch.py
Browse files
Transaction_Error_LSTM_LSTM_weight_Raznost_steps_per_epoch.py
DELETED
@@ -1,577 +0,0 @@
|
|
1 |
-
# Модель связывает характер расстановки параметров управления трназакциями по публичным методам маркированных классов (архитектурно значимых)
|
2 |
-
# с количеством фиксируемых ошибок в работе кода.
|
3 |
-
# На базе LSTM + LSTM + sample_weight, веса разности целевых значений и суммы параметров с нормированием суммой всех параметров
|
4 |
-
# steps_per_epoch = 2000
|
5 |
-
|
6 |
-
# импорт библиотек
|
7 |
-
import os
|
8 |
-
import warnings
|
9 |
-
warnings.filterwarnings(action='ignore')
|
10 |
-
import pickle
|
11 |
-
import pandas as pd
|
12 |
-
import numpy as np
|
13 |
-
import copy
|
14 |
-
from sklearn.model_selection import cross_validate
|
15 |
-
from sklearn.base import BaseEstimator, TransformerMixin
|
16 |
-
from sklearn.utils import check_array
|
17 |
-
from sklearn.impute import SimpleImputer
|
18 |
-
from sklearn.feature_selection import SelectFwe, f_regression, VarianceThreshold
|
19 |
-
from sklearn.cluster import FeatureAgglomeration
|
20 |
-
from sklearn.linear_model import LassoLarsCV, LassoCV
|
21 |
-
from sklearn.decomposition import PCA
|
22 |
-
from sklearn.ensemble import ExtraTreesRegressor
|
23 |
-
from sklearn.pipeline import Pipeline
|
24 |
-
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
25 |
-
|
26 |
-
from sklearn.ensemble import RandomForestRegressor
|
27 |
-
from sklearn.ensemble import GradientBoostingRegressor
|
28 |
-
from sklearn.ensemble import HistGradientBoostingRegressor
|
29 |
-
from sklearn.ensemble import StackingRegressor
|
30 |
-
from sklearn.linear_model import RidgeCV
|
31 |
-
from sklearn.svm import LinearSVR
|
32 |
-
from sklearn.linear_model import LinearRegression
|
33 |
-
from sklearn.ensemble import VotingRegressor
|
34 |
-
from sklearn.neighbors import KNeighborsRegressor
|
35 |
-
|
36 |
-
from sklearn import preprocessing
|
37 |
-
from sklearn import utils
|
38 |
-
from sklearn.naive_bayes import GaussianNB
|
39 |
-
from sklearn.linear_model import LogisticRegression
|
40 |
-
from sklearn.tree import DecisionTreeClassifier
|
41 |
-
from sklearn.ensemble import RandomForestClassifier
|
42 |
-
from sklearn.neighbors import KNeighborsClassifier
|
43 |
-
|
44 |
-
import tensorflow as tf
|
45 |
-
import tensorflow_hub as hub
|
46 |
-
import tensorflow_datasets as tfds
|
47 |
-
|
48 |
-
from tensorflow import keras
|
49 |
-
from tensorflow.keras import layers
|
50 |
-
import matplotlib.pyplot as plt
|
51 |
-
|
52 |
-
#from keras.utils.vis_utils import plot_model
|
53 |
-
from keras.utils import plot_model
|
54 |
-
|
55 |
-
import math as mt
|
56 |
-
import array
|
57 |
-
|
58 |
-
from tensorflow.python.framework import ops
|
59 |
-
import urllib.request as request
|
60 |
-
|
61 |
-
from sklearn import preprocessing
|
62 |
-
from sklearn.preprocessing import MinMaxScaler # Масштабирование данных в заданном диапазоне
|
63 |
-
|
64 |
-
# Монтаж Google Диска
|
65 |
-
from google.colab import drive
|
66 |
-
drive.mount('/content/drive')
|
67 |
-
|
68 |
-
# Параметры модели ------------------------------------------------------------------------------
|
69 |
-
num_increment_cycles = 0 # 20 Количество циклов увеличения данных
|
70 |
-
SKO_random_normal = 0 #5 # СКО нормального распределения при расширении исходного датасета
|
71 |
-
model_name = 'Model_Transaction_Error.keras' # Имя файла модели
|
72 |
-
img_file = 'Structure_model.png' # Имя файла структуры модели
|
73 |
-
inp_file = 'inp_monitoring-dataset.xlsx' # Имя файла с входными данными для прогноза
|
74 |
-
K_input_output = 1 # 0.5 # Коэфф. учёта входных сигналов на выходе модели
|
75 |
-
kernel_size_ID = 4 # Окно свёртки
|
76 |
-
LSTM_units_ID = 150
|
77 |
-
mlp_dropout_ID = 0.3
|
78 |
-
epochs_ID = 1000
|
79 |
-
batch_size_ID = 100
|
80 |
-
steps_per_epoch_ID = 2000
|
81 |
-
Dobavka = 0
|
82 |
-
# -----------------------------------------------------------------------------------------------
|
83 |
-
|
84 |
-
# версии библиотек
|
85 |
-
import sklearn
|
86 |
-
import pandas
|
87 |
-
import numpy
|
88 |
-
import tensorflow
|
89 |
-
|
90 |
-
print(f'{sklearn.__version__=}')
|
91 |
-
print(f'{pandas.__version__=}')
|
92 |
-
print(f'{numpy.__version__=}')
|
93 |
-
print(f'{tensorflow.__version__=}')
|
94 |
-
|
95 |
-
from google.colab import files
|
96 |
-
#uploaded = files.upload() # Загрузка файла с компьютера
|
97 |
-
|
98 |
-
# Вспомогательные функции и классы
|
99 |
-
def evaluate_pipe_cv(model, X, y):
|
100 |
-
cvs = cross_validate(
|
101 |
-
model,
|
102 |
-
X,
|
103 |
-
y,
|
104 |
-
scoring=('r2', 'neg_mean_squared_error'),
|
105 |
-
)
|
106 |
-
|
107 |
-
r2_res = cvs['test_r2']
|
108 |
-
rmse_res = cvs['test_neg_mean_squared_error']
|
109 |
-
|
110 |
-
r2_mean = r2_res.mean()
|
111 |
-
r2_std = r2_res.std()
|
112 |
-
|
113 |
-
rmse_mean = np.mean([(np.abs(x)) ** 0.5 for x in rmse_res])
|
114 |
-
rmse_std = np.std([(np.abs(x)) ** 0.5 for x in rmse_res])
|
115 |
-
|
116 |
-
print(
|
117 |
-
f'r2 = {r2_mean:.3f} +/- {r2_std:.3f}\n'
|
118 |
-
f'rmse = {rmse_mean:.3f} +/- {rmse_std:.3f}'
|
119 |
-
)
|
120 |
-
|
121 |
-
class ZeroCount(BaseEstimator, TransformerMixin):
|
122 |
-
def fit(self, X, y=None):
|
123 |
-
return self
|
124 |
-
|
125 |
-
def transform(self, X, y=None):
|
126 |
-
X = check_array(X)
|
127 |
-
n_features = X.shape[1]
|
128 |
-
|
129 |
-
X_transformed = np.copy(X)
|
130 |
-
|
131 |
-
non_zero_vector = np.count_nonzero(X_transformed, axis=1)
|
132 |
-
non_zero = np.reshape(non_zero_vector, (-1, 1))
|
133 |
-
zero_col = np.reshape(n_features - non_zero_vector, (-1, 1))
|
134 |
-
|
135 |
-
X_transformed = np.hstack((non_zero, X_transformed))
|
136 |
-
X_transformed = np.hstack((zero_col, X_transformed))
|
137 |
-
|
138 |
-
# print()
|
139 |
-
# print('X_transformed = ', X_transformed.shape)#,'\n')
|
140 |
-
# print(X_transformed)
|
141 |
-
|
142 |
-
return X_transformed
|
143 |
-
|
144 |
-
class StackingEstimator(BaseEstimator, TransformerMixin):
|
145 |
-
def __init__(self, estimator):
|
146 |
-
self.estimator = estimator
|
147 |
-
|
148 |
-
def fit(self, X, y=None, **fit_params):
|
149 |
-
self.estimator.fit(X, y, **fit_params)
|
150 |
-
return self
|
151 |
-
|
152 |
-
def transform(self, X):
|
153 |
-
X = check_array(X)
|
154 |
-
X_transformed = np.copy(X)
|
155 |
-
X_transformed = np.hstack(
|
156 |
-
(np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed)
|
157 |
-
)
|
158 |
-
|
159 |
-
# print()
|
160 |
-
# print('X_transformed = ', X_transformed.shape)#,'\n')
|
161 |
-
# print(X_transformed)
|
162 |
-
|
163 |
-
return X_transformed
|
164 |
-
|
165 |
-
def print_result_regression(y_true, y_pred):
|
166 |
-
r2 = r2_score(y_true, y_pred)
|
167 |
-
mae = mean_absolute_error(y_true, y_pred)
|
168 |
-
rmse = mean_squared_error(y_true, y_pred)**0.5
|
169 |
-
print(f'''
|
170 |
-
r2: {r2:.4f}
|
171 |
-
mae: {mae:.4f}
|
172 |
-
rmse: {rmse:.4f}
|
173 |
-
''')
|
174 |
-
return f'{r2:.4f}',f'{mae:.4f}',f'{rmse:.4f}'
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
# используемые признаки
|
180 |
-
#cat_features = [
|
181 |
-
# 'StendName',
|
182 |
-
# #'OpenedCount',
|
183 |
-
# #'AllReopenedCount',
|
184 |
-
# #'linksSum',
|
185 |
-
# #'TermTimeMax',
|
186 |
-
# #'TermTimeMin',
|
187 |
-
# 'bitbucketRepo_name',
|
188 |
-
# 'jira_key',
|
189 |
-
# 'bb_key',
|
190 |
-
#]
|
191 |
-
|
192 |
-
# cat_features = [
|
193 |
-
# 'projectKey',
|
194 |
-
# #'_class',
|
195 |
-
# #'_id',
|
196 |
-
# ]
|
197 |
-
|
198 |
-
target = 'openedCount'
|
199 |
-
num_features = [
|
200 |
-
'markers.TX_METHOD_COMPONENT',
|
201 |
-
'markers.TX_METHOD_COMPONENT_LACK',
|
202 |
-
'markers.TX_METHOD_CONTROLLER',
|
203 |
-
'markers.TX_METHOD_CONTROLLER_LACK',
|
204 |
-
'markers.TX_METHOD_CONTROLLER_DELETE',
|
205 |
-
'markers.TX_METHOD_CONTROLLER_DELETE_LACK',
|
206 |
-
'markers.TX_METHOD_CONTROLLER_GET',
|
207 |
-
'markers.TX_METHOD_CONTROLLER_GET_LACK',
|
208 |
-
'markers.TX_METHOD_CONTROLLER_PUTPOSTPATCH',
|
209 |
-
'markers.TX_METHOD_CONTROLLER_PUTPOSTPATCH_LACK',
|
210 |
-
'markers.TX_METHOD_DEFAULT',
|
211 |
-
'markers.TX_METHOD_DEFAULT_LACK',
|
212 |
-
'markers.TX_METHOD_MANDATORY',
|
213 |
-
'markers.TX_METHOD_MANDATORY_LACK',
|
214 |
-
'markers.TX_METHOD_NESTED',
|
215 |
-
'markers.TX_METHOD_NESTED_LACK',
|
216 |
-
'markers.TX_METHOD_NEVER',
|
217 |
-
'markers.TX_METHOD_NEVER_LACK',
|
218 |
-
'markers.TX_METHOD_NOT_SUPPORTED',
|
219 |
-
'markers.TX_METHOD_NOT_SUPPORTED_LACK',
|
220 |
-
'markers.TX_METHOD_READ_COMMITTED',
|
221 |
-
'markers.TX_METHOD_READ_COMMITTED_LACK',
|
222 |
-
'markers.TX_METHOD_READ_UNCOMMITTED',
|
223 |
-
'markers.TX_METHOD_READ_UNCOMMITTED_LACK',
|
224 |
-
'markers.TX_METHOD_REPEATABLE_READ',
|
225 |
-
'markers.TX_METHOD_REPEATABLE_READ_LACK',
|
226 |
-
'markers.TX_METHOD_REPOSITORY',
|
227 |
-
'markers.TX_METHOD_REPOSITORY_LACK',
|
228 |
-
'markers.TX_METHOD_REQUIRED',
|
229 |
-
'markers.TX_METHOD_REQUIRED_LACK',
|
230 |
-
'markers.TX_METHOD_REQUIRES_NEW',
|
231 |
-
'markers.TX_METHOD_REQUIRES_NEW_LACK',
|
232 |
-
'markers.TX_METHOD_SERIALIZABLE',
|
233 |
-
'markers.TX_METHOD_SERIALIZABLE_LACK',
|
234 |
-
'markers.TX_METHOD_SERVICE',
|
235 |
-
'markers.TX_METHOD_SERVICE_LACK',
|
236 |
-
'markers.TX_METHOD_SUPPORTS',
|
237 |
-
'markers.TX_METHOD_SUPPORTS_LACK',
|
238 |
-
]
|
239 |
-
|
240 |
-
# Чтение данных
|
241 |
-
#data = pd.read_csv('dataset_sberstack.csv')
|
242 |
-
#data = pd.read_csv('monitoring-dataset.csv')
|
243 |
-
#data = pd.read_excel('monitoring-dataset.xlsx')
|
244 |
-
data = pd.read_excel('Transaction_Error.xlsx')
|
245 |
-
data = data.astype(float)
|
246 |
-
data.shape
|
247 |
-
print('data.shape = ', data.shape)
|
248 |
-
|
249 |
-
# Увеличение размера датасета на основе случайного изменения исходного датасета --------------------------------------
|
250 |
-
data_ = copy.deepcopy(data)
|
251 |
-
data__ = copy.deepcopy(data)
|
252 |
-
|
253 |
-
# Генерация случайных значений в окрестности исходных значений
|
254 |
-
def random_value(x):
|
255 |
-
x = np.random.normal(x, SKO_random_normal, 1) # MO, SKO, Кол-во
|
256 |
-
if x < 0: x = abs(x)
|
257 |
-
return np.float64(np.round(x))
|
258 |
-
|
259 |
-
np.random.seed(0)
|
260 |
-
for i in range(num_increment_cycles):
|
261 |
-
for col in num_features:
|
262 |
-
data_[col] = data_[col].apply(random_value)
|
263 |
-
data_[target] = data_[target].apply(random_value)
|
264 |
-
|
265 |
-
data = data._append(data_)
|
266 |
-
data_ = copy.deepcopy(data__)
|
267 |
-
# ----------------------------------------------------------------------------------------------------------------------
|
268 |
-
|
269 |
-
from sklearn.model_selection import train_test_split
|
270 |
-
# разбиение на признаки и целевую переменную
|
271 |
-
X = data.drop(columns=[target])
|
272 |
-
y = data[target]
|
273 |
-
print('X.shape = ', X.shape,' y.shape = ', y.shape)
|
274 |
-
#print(y)
|
275 |
-
|
276 |
-
# преобразование числовых признаков
|
277 |
-
# for col in num_features:
|
278 |
-
# X[col] = X[col].astype(float)
|
279 |
-
# #print(X[col])
|
280 |
-
#
|
281 |
-
# for i in range(int(y.shape[0])):
|
282 |
-
# y[i] = y[i].astype(float)
|
283 |
-
# #print(y[i])
|
284 |
-
|
285 |
-
# заполнение пропусков
|
286 |
-
# strategy = 'median'
|
287 |
-
# nan_cols = X.columns[X.isna().any()].tolist()
|
288 |
-
# for col in nan_cols:
|
289 |
-
# #print(col)
|
290 |
-
# imputer = SimpleImputer(strategy=strategy)
|
291 |
-
# X[col] = imputer.fit_transform(np.array(X[col]).reshape(-1, 1))
|
292 |
-
# #print(X_train[col])
|
293 |
-
|
294 |
-
# преобразование категориальных признаков
|
295 |
-
#for col in cat_features:
|
296 |
-
# print(col)
|
297 |
-
# X[col] = pd.Categorical(X[col]).codes
|
298 |
-
# print(X[col])
|
299 |
-
|
300 |
-
# Преобразование X и y в массив numpy
|
301 |
-
X, y = np.array(X), np.array(y)
|
302 |
-
|
303 |
-
# Масштабирование обучающих данных в диапазоне 0 и 1
|
304 |
-
# scaler_X = MinMaxScaler(feature_range = (0, 1))
|
305 |
-
# X = scaler_X.fit_transform(X)
|
306 |
-
|
307 |
-
X = X + Dobavka
|
308 |
-
max_X = numpy.max(X)
|
309 |
-
X = X / max_X
|
310 |
-
print('max_X = ', max_X)
|
311 |
-
print('X = ', X)
|
312 |
-
|
313 |
-
# y = y.reshape(-1, 1)
|
314 |
-
# scaler_y = MinMaxScaler(feature_range = (0, 1))
|
315 |
-
# y = scaler_y.fit_transform(y)
|
316 |
-
|
317 |
-
|
318 |
-
y = y + Dobavka
|
319 |
-
max_y = numpy.max(y)
|
320 |
-
y = y / max_y
|
321 |
-
print('max_y = ', max_y)
|
322 |
-
print('y = ', y)
|
323 |
-
|
324 |
-
# Разделение данных на обучающую и тестовую выборки
|
325 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
|
326 |
-
print('X_train.shape = ', X_train.shape,' y_train.shape = ', y_train.shape) # X_train.shape = (111348, 42) y_train.shape = (111348, 1)
|
327 |
-
print('X_test.shape = ', X_test.shape,' y_test.shape = ', y_test.shape) # X_test.shape = (19650, 42) y_test.shape = (19650, 1)
|
328 |
-
|
329 |
-
# Создание вектора весов sample_weight для обучения модели на основе разности суммы показателей и цели
|
330 |
-
sample_weight = np.ones(shape=y_train.shape[0],) # заполнение вектора единицами '1'
|
331 |
-
Raznost_y = np.sum(X_train, axis=1, dtype=float)
|
332 |
-
Raznost_y = Raznost_y - y_train # Вектор разности суммы показателей и цели
|
333 |
-
Raznost_y = np.absolute(Raznost_y)
|
334 |
-
Summa = np.sum(Raznost_y)
|
335 |
-
# print('Summa = ', Summa)
|
336 |
-
Raznost_y = Raznost_y / Summa
|
337 |
-
# print('Raznost_y = ', Raznost_y)
|
338 |
-
ample_weight = sample_weight + Raznost_y
|
339 |
-
# print('sample_weight.shape = ', sample_weight.shape)
|
340 |
-
# print('sample_weight = ', sample_weight)
|
341 |
-
|
342 |
-
# Чтобы обучить LSTM на наших данных, нужно преобразовать данные в форму, принятую LSTM.
|
343 |
-
# Нужно преобразовать данные в трехмерный формат. Первое измерение - это количество записей или строк в наборе данных, которое равно X.shape[0].
|
344 |
-
# Второе измерение - это количество столбцов, равное X.shape[1], а последнее измерение – количество индикаторов.
|
345 |
-
# Поскольку мы используем только 1 временной ряд, то количество индикаторов = 1.
|
346 |
-
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
|
347 |
-
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
|
348 |
-
y_train = np.reshape(y_train, (y_train.shape[0]))
|
349 |
-
y_test = np.reshape(y_test, (y_test.shape[0]))
|
350 |
-
print('X_train.shape = ', X_train.shape,' y_train.shape = ', y_train.shape) # X_train.shape = (111348, 42, 1) y_train.shape = (111348,)
|
351 |
-
print('X_test.shape = ', X_test.shape,' y_test.shape = ', y_test.shape) # X_test.shape = (19650, 42, 1) y_test.shape = (19650,)
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
# Модель
|
357 |
-
# Функция создания модели --------------------------------------------------------------------------------------------------------
|
358 |
-
def build_model(
|
359 |
-
input_shape,
|
360 |
-
LSTM_units,
|
361 |
-
mlp_dropout):
|
362 |
-
|
363 |
-
inputs = keras.Input(shape=input_shape)
|
364 |
-
|
365 |
-
x = inputs
|
366 |
-
z = inputs
|
367 |
-
|
368 |
-
x = layers.LSTM(units=LSTM_units, return_sequences=True)(x)
|
369 |
-
x = layers.Dropout(mlp_dropout)(x)
|
370 |
-
#x = layers.LSTM(units=LSTM_units, return_sequences=True)(x)
|
371 |
-
#x = layers.Dropout(mlp_dropout)(x)
|
372 |
-
#x = layers.LSTM(units=LSTM_units, return_sequences=True)(x)
|
373 |
-
#x = layers.Dropout(mlp_dropout)(x)
|
374 |
-
|
375 |
-
z = layers.LSTM(units=LSTM_units, return_sequences=True)(z)
|
376 |
-
z = layers.Dropout(mlp_dropout)(z)
|
377 |
-
#z = layers.LSTM(units=LSTM_units, return_sequences=True)(z)
|
378 |
-
#z = layers.Dropout(mlp_dropout)(z)
|
379 |
-
|
380 |
-
res = layers.add([x, z, inputs])
|
381 |
-
#x = layers.LSTM(units=LSTM_units, return_sequences=True)(res)
|
382 |
-
#x = layers.Dropout(mlp_dropout)(x)
|
383 |
-
#x = layers.LSTM(units=LSTM_units, return_sequences=True)(x)
|
384 |
-
#x = layers.Dropout(mlp_dropout)(x)
|
385 |
-
x = layers.LSTM(units=LSTM_units)(res)#(x)
|
386 |
-
x = layers.Dropout(mlp_dropout)(x)
|
387 |
-
|
388 |
-
outputs = layers.Dense(units = 1)(x) # , activation="softmax"
|
389 |
-
|
390 |
-
return keras.Model(inputs, outputs)
|
391 |
-
# --------------------------------------------------------------------------------------------------------------------------------
|
392 |
-
|
393 |
-
# Создание модели
|
394 |
-
Input_shape = X_train.shape[1:]
|
395 |
-
print('Input_shape = ', Input_shape, '\n') # (42, 1)
|
396 |
-
|
397 |
-
model = build_model(
|
398 |
-
input_shape=Input_shape,
|
399 |
-
LSTM_units=LSTM_units_ID,
|
400 |
-
mlp_dropout=mlp_dropout_ID,
|
401 |
-
)
|
402 |
-
|
403 |
-
model.compile(
|
404 |
-
optimizer=keras.optimizers.Adam(learning_rate=1e-3),
|
405 |
-
loss="mean_squared_error",
|
406 |
-
metrics=["mean_absolute_error"],
|
407 |
-
)
|
408 |
-
|
409 |
-
model.summary()
|
410 |
-
# Визуализация структуры модели и запись в файл img_file
|
411 |
-
keras.utils.plot_model(model, to_file=img_file, show_shapes=True, show_layer_names=True)
|
412 |
-
|
413 |
-
|
414 |
-
callbacks = [
|
415 |
-
keras.callbacks.ModelCheckpoint(
|
416 |
-
model_name,
|
417 |
-
#save_weights_only=True, # Сохранение только весов модели
|
418 |
-
#save_freq='epoch', # Сохранение модели каждую эпоху
|
419 |
-
save_best_only = True,
|
420 |
-
monitor = "val_loss", #"val_loss", # "val_categorical_accuracy", #"val_loss", "val_mean_absolute_error",
|
421 |
-
mode = 'min' #'max' #'min'
|
422 |
-
),
|
423 |
-
keras.callbacks.ReduceLROnPlateau(
|
424 |
-
monitor = "val_loss", #"val_loss", # "val_categorical_accuracy", #"val_loss", "val_mean_absolute_error",
|
425 |
-
factor = 0.75,
|
426 |
-
patience = 15,
|
427 |
-
min_lr = 1e-7
|
428 |
-
),
|
429 |
-
keras.callbacks.EarlyStopping(
|
430 |
-
monitor = "val_loss", #"val_loss", # "val_categorical_accuracy", #"val_loss", "val_mean_absolute_error",
|
431 |
-
mode = 'min', # 'max', #'min',
|
432 |
-
patience = 50,
|
433 |
-
restore_best_weights=True,
|
434 |
-
verbose = 1
|
435 |
-
),
|
436 |
-
keras.callbacks.TensorBoard(log_dir='log')
|
437 |
-
]
|
438 |
-
|
439 |
-
|
440 |
-
'''
|
441 |
-
# Загрузка модели из файла model_name для дообучения ------------------------------------------------
|
442 |
-
print()
|
443 |
-
print('Загрузка модели из файла model_name для дообучения \n')
|
444 |
-
model = keras.models.load_model(model_name)
|
445 |
-
#model.load_weights(model_name, by_name=True) # Загрузка только весов модели
|
446 |
-
|
447 |
-
model.compile(
|
448 |
-
optimizer=keras.optimizers.Adam(learning_rate=5.6250e-04),
|
449 |
-
loss="mean_squared_error",
|
450 |
-
metrics=["mean_absolute_error"],
|
451 |
-
)
|
452 |
-
# ----------------------------------------------------------------------------------------------------
|
453 |
-
'''
|
454 |
-
|
455 |
-
|
456 |
-
# Обучение модели
|
457 |
-
history = model.fit(
|
458 |
-
X_train, # Входы обучающей выборки
|
459 |
-
y_train, # Выходы обучающей выборки
|
460 |
-
sample_weight=sample_weight,
|
461 |
-
#steps_per_epoch = steps_per_epoch_ID,
|
462 |
-
validation_split=0.2,
|
463 |
-
epochs=epochs_ID, # 1000
|
464 |
-
batch_size=batch_size_ID ,
|
465 |
-
callbacks=callbacks,
|
466 |
-
shuffle=True, #False,
|
467 |
-
verbose=1
|
468 |
-
)
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
#*************************************************************************************************************
|
474 |
-
# Загрузка готовой модели из файла или папки
|
475 |
-
#*************************************************************************************************************
|
476 |
-
print()
|
477 |
-
print('Загрузка готовой модели из файла или папки на гугл-диске \n')
|
478 |
-
model_ = keras.models.load_model(model_name)
|
479 |
-
#model.summary()
|
480 |
-
|
481 |
-
#model_ = model
|
482 |
-
|
483 |
-
# Прогнозирование - тестирование качества модели на тестовых данных ************************************************************
|
484 |
-
#predictions = model_.predict(X_test)
|
485 |
-
predictions = model_.predict(X)
|
486 |
-
|
487 |
-
# Возврат масштабированных прогнозных данных обратно к их фактическим значениям.
|
488 |
-
# y_test = y_test * max_y # Оценка точности прогноза на тестовой выборке данных
|
489 |
-
# y_test = y_test - Dobavka
|
490 |
-
y = y * max_y # Оценка точности прогноза на всей выборке данных
|
491 |
-
y = y - Dobavka
|
492 |
-
predictions = predictions * max_y
|
493 |
-
predictions = predictions - Dobavka
|
494 |
-
|
495 |
-
# Не должно быть отрицательных значений
|
496 |
-
for i in range(len(predictions)):
|
497 |
-
if predictions[i] < 0: predictions[i] = abs(predictions[i])
|
498 |
-
#print('y = ', predictions)
|
499 |
-
|
500 |
-
# Контроль показателей качества работы модели по тестовой выборке
|
501 |
-
#r2, mae, rmse = print_result_regression(y_test, predictions)
|
502 |
-
r2, mae, rmse = print_result_regression(y, predictions)
|
503 |
-
|
504 |
-
# Вывод нескольких последних значений цели и прогноза
|
505 |
-
print(' Цель Прогноз Отклонение')
|
506 |
-
for i in range(len(predictions)-30, len(predictions)):
|
507 |
-
#print(f''' {y_test[i]:9.4f} {predictions[i][0]:9.4f} {abs(y_test[i] - predictions[i][0]):9.4f} ''') # Оценка прогноза по тестовой выборке
|
508 |
-
print(f''' {y[i]:9.4f} {predictions[i][0]:9.4f} {abs(y[i] - predictions[i][0]):9.4f} ''') # Оценка прогноза по всем данным
|
509 |
-
|
510 |
-
# Отклонения прогноза от целевых значений
|
511 |
-
# Delta_y = y - predictions # Недостаточно оперативной памяти 12.7 Гб для вычисления
|
512 |
-
# print(Delta_y)
|
513 |
-
#print(y - predictions)
|
514 |
-
|
515 |
-
|
516 |
-
# Сохранение отклонений в файл
|
517 |
-
#np.savetxt('Transaction_Error_Delta_y.csv', Delta_y, delimiter=",")
|
518 |
-
# np.savetxt('Transaction_Error_Delta_y.csv', predictions, delimiter=",")
|
519 |
-
# print('\n Файл Delta_y сохранён.')
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
'''
|
524 |
-
#*************************************************************************************************************
|
525 |
-
# Прогноз одного целевого значения
|
526 |
-
#*************************************************************************************************************
|
527 |
-
# Загрузка готовой модели из файла или папки
|
528 |
-
#*************************************************************************************************************
|
529 |
-
print()
|
530 |
-
print('Загрузка готовой модели из файла или папки на гугл-диске \n')
|
531 |
-
model_ = keras.models.load_model(model_name)
|
532 |
-
#model.summary()
|
533 |
-
|
534 |
-
# Загрузка файла с входными данными для прогноза
|
535 |
-
print('Загрузка файла с входными данными для прогноза \n')
|
536 |
-
inp_data = pd.read_excel(inp_file)
|
537 |
-
inp_data = inp_data.astype(float)
|
538 |
-
print('inp_data.shape = ', inp_data.shape)
|
539 |
-
|
540 |
-
# Преобразование inp_data в массив numpy
|
541 |
-
inp_data = np.array(inp_data)
|
542 |
-
print('inp_data = ', inp_data)
|
543 |
-
|
544 |
-
# Масштабирование обучающих данных в диапазоне 0 и 1. max_X - загружается вместе с готовой моделью
|
545 |
-
inp_data = inp_data / max_X
|
546 |
-
#inp_data = scaler_X.fit_transform(inp_data)
|
547 |
-
# scaler_inp_data = MinMaxScaler(feature_range = (0, 1))
|
548 |
-
# inp_data = scaler_inp_data.fit_transform(inp_data)
|
549 |
-
print('inp_data = ', inp_data)
|
550 |
-
|
551 |
-
# Возврат масштабированных прогнозных данных обратно к их фактическим значениям.
|
552 |
-
#inp_data = scaler_X.inverse_transform(inp_data)
|
553 |
-
#inp_data = scaler_inp_data.inverse_transform(inp_data)
|
554 |
-
#print('inp_data = ', inp_data)
|
555 |
-
|
556 |
-
# Чтобы использовать модель LSTM нужно преобразовать данные в форму, принятую LSTM.
|
557 |
-
inp_data = np.reshape(inp_data, (inp_data.shape[0], inp_data.shape[1], 1))
|
558 |
-
print('inp_data.shape = ', inp_data.shape) # inp_data.shape = (1, 42, 1)
|
559 |
-
|
560 |
-
# Прогнозирование целевого значения ************************************************************
|
561 |
-
predictions = model_.predict(inp_data)
|
562 |
-
print('Целевое значение = ', predictions)
|
563 |
-
|
564 |
-
# Возврат масштабированных прогнозных данных обратно к их фактическим значениям. . max_y - загружается вместе с готовой моделью
|
565 |
-
predictions = predictions * max_y
|
566 |
-
#predictions = predictions.reshape(-1, 1)
|
567 |
-
#predictions = scaler_y.inverse_transform(predictions) # *996
|
568 |
-
|
569 |
-
# Не должно быть отрицательных значений
|
570 |
-
if predictions < 0: predictions = 0
|
571 |
-
#print('y = ', predictions)
|
572 |
-
|
573 |
-
# Вывод целевого значения
|
574 |
-
print('Целевое значение = ', predictions)
|
575 |
-
'''
|
576 |
-
|
577 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|