Jan90's picture
Rename app.py to app(0,1).py
31ccdc7 verified
raw
history blame contribute delete
5.48 kB
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline
import torch
import gradio as gr
from openpyxl import load_workbook
from numpy import mean
# Load tokenizers and models
tokenizer = AutoTokenizer.from_pretrained("suriya7/bart-finetuned-text-summarization")
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/bart-finetuned-text-summarization")
tokenizer_keywords = AutoTokenizer.from_pretrained("transformer3/H2-keywordextractor")
model_keywords = AutoModelForSeq2SeqLM.from_pretrained("transformer3/H2-keywordextractor")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')
new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')
classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)
label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}
# Function to parse Excel file
def parse_xl(file_path):
cells = []
workbook = load_workbook(filename=file_path)
for sheet in workbook.worksheets:
for row in sheet.iter_rows():
for cell in row:
if cell.value != None:
cells.append(cell.value)
return cells
# Function to evaluate reviews from Excel file
def evaluate(file):
reviews = parse_xl(file)
ratings = []
text = ""
sentiments = []
for review in reviews:
rating = int(classifier(review)[0]['label'].split('_')[1])
ratings.append(rating)
text += review
text += " "
sentiment = classifier(review)[0]['label']
sentiment_label = "Positive" if sentiment == "LABEL_4" or sentiment == "LABEL_5" else "Negative" if sentiment == "LABEL_1" or sentiment == "LABEL_2" else "Neutral"
sentiments.append(sentiment_label)
overall_sentiment = "Positive" if sentiments.count("Positive") > sentiments.count("Negative") else "Negative" if sentiments.count("Negative") > sentiments.count("Positive") else "Neutral"
inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors="pt")
summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=10, max_length=50)
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
# Modify the summary to third person
summary = summary.replace("I", "He/She").replace("my", "his/her").replace("me", "him/her")
inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors="pt")
summary_ids_keywords = model_keywords.generate(inputs_keywords["input_ids"], num_beams=2, min_length=0, max_length=100)
keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
return round(mean(ratings), 2), summary, keywords, overall_sentiment
# Function to test a single text input
def test_area(text):
inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors="pt")
summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=10, max_length=50)
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
# Modify the summary to third person
summary = summary.replace("I", "He/She").replace("my", "his/her").replace("me", "him/her")
inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors="pt")
summary_ids_keywords = model_keywords.generate(inputs_keywords["input_ids"], num_beams=2, min_length=0, max_length=100)
keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
sentiment = classifier(text)[0]['label']
sentiment_label = "Positive" if sentiment == "LABEL_4" or sentiment == "LABEL_5" else "Negative" if sentiment == "LABEL_1" or sentiment == "LABEL_2" else "Neutral"
rating = int(classifier(text)[0]['label'].split('_')[1])
return rating, summary, keywords, sentiment_label
# Main interface
main_interface = gr.Interface(
fn=evaluate,
inputs=gr.File(label="Reviews"),
outputs=[gr.Textbox(label="Overall Rating"), gr.Textbox(label="Summary"), gr.Textbox(label="Keywords"), gr.Textbox(label="Overall Sentiment")],
title='Summarize Reviews',
description="Evaluate and summarize collection of reviews. Reviews are submitted as an Excel file, where each review is in its own cell."
)
# Testing area interface
testing_interface = gr.Interface(
fn=test_area,
inputs=gr.Textbox(label="Input Text"),
outputs=[gr.Textbox(label="Rating"), gr.Textbox(label="Summary"), gr.Textbox(label="Keywords"), gr.Textbox(label="Sentiment")],
title='Testing Area',
description="Test the summarization, keyword extraction, sentiment analysis, and rating on custom text input."
)
# Combine interfaces into a tabbed interface with a sidebar
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("## Sidebar")
gr.Button("Button 1")
gr.Button("Button 2")
with gr.Column(scale=4):
iface = gr.TabbedInterface(
[main_interface, testing_interface],
["Summarize Reviews", "Testing Area"]
)
demo.launch(share=True)