Spaces:
Runtime error
Runtime error
Upload 7 files
Browse files- app/data_processing/data_processing.py +8 -0
- app/main.py +17 -0
- app/model_inference/model_inference.py +6 -0
- app/report_generation/report_generation.py +10 -0
- app/visualizations/visualizations.py +13 -0
- config.py +6 -0
- requirements.txt +6 -5
app/data_processing/data_processing.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
import requests
|
4 |
+
|
5 |
+
def extract_data_from_html(url):
|
6 |
+
response = requests.get(url)
|
7 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
8 |
+
return soup.get_text()
|
app/main.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
from app.data_processing import extract_data_from_html
|
4 |
+
from app.model_inference import run_inference
|
5 |
+
from app.report_generation import generate_report
|
6 |
+
from app.visualizations import plot_trends
|
7 |
+
|
8 |
+
# Пример использования
|
9 |
+
def process_data_and_generate_report(url):
|
10 |
+
raw_data = extract_data_from_html(url)
|
11 |
+
analysis_results = run_inference(raw_data)
|
12 |
+
report = generate_report(analysis_results)
|
13 |
+
plot = plot_trends(analysis_results)
|
14 |
+
return report, plot
|
15 |
+
|
16 |
+
iface = gr.Interface(fn=process_data_and_generate_report, inputs="text", outputs=["text", "plot"])
|
17 |
+
iface.launch()
|
app/model_inference/model_inference.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from transformers import pipeline
|
3 |
+
|
4 |
+
def run_inference(text):
|
5 |
+
nlp_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
|
6 |
+
return nlp_pipeline(text)
|
app/report_generation/report_generation.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
3 |
+
|
4 |
+
def generate_report(data):
|
5 |
+
input_text = "Generate a detailed report about the following: " + str(data)
|
6 |
+
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
7 |
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
8 |
+
inputs = tokenizer.encode(input_text, return_tensors="pt")
|
9 |
+
outputs = model.generate(inputs, max_length=150)
|
10 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
app/visualizations/visualizations.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
|
4 |
+
def plot_trends(data):
|
5 |
+
models = [item['model'] for item in data]
|
6 |
+
prices = [float(item['price'].replace('₴', '').strip()) for item in data]
|
7 |
+
plt.plot(models, prices, marker='o', color='b')
|
8 |
+
plt.title('Price Trends of Products')
|
9 |
+
plt.xlabel('Models')
|
10 |
+
plt.ylabel('Price')
|
11 |
+
plt.xticks(rotation=45)
|
12 |
+
plt.tight_layout()
|
13 |
+
return plt
|
config.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Конфигурации проекта
|
3 |
+
DATA_PATH = "data/raw_data/"
|
4 |
+
PROCESSED_DATA_PATH = "data/processed_data/"
|
5 |
+
MODEL_NAME = "bert-large-cased"
|
6 |
+
HUGGINGFACE_TOKEN = "your_huggingface_api_token"
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
1 |
+
|
2 |
+
transformers==4.30.0
|
3 |
+
gradio==3.24.1
|
4 |
+
matplotlib==3.7.1
|
5 |
+
requests==2.28.2
|
6 |
+
beautifulsoup4==4.11.1
|