{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load model directly\n", "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline\n", "import torch\n", "import gradio as gr\n", "from openpyxl import load_workbook\n", "from numpy import mean\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n", "model = AutoModelForSeq2SeqLM.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n", "\n", "tokenizer_keywords = AutoTokenizer.from_pretrained(\"transformer3/H2-keywordextractor\")\n", "model_keywords = AutoModelForSeq2SeqLM.from_pretrained(\"transformer3/H2-keywordextractor\")\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "# Load the fine-tuned model and tokenizer\n", "new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')\n", "new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')\n", "\n", "\n", "# Create a classification pipeline\n", "classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)\n", "\n", "# Add label mapping for sentiment analysis\n", "label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}\n", "\n", "def parse_xl(file_path):\n", " cells = []\n", "\n", " workbook = load_workbook(filename=file_path)\n", " for sheet in workbook.worksheets:\n", " for row in sheet.iter_rows():\n", " for cell in row:\n", " if cell.value != None:\n", " cells.append(cell.value)\n", "\n", " return cells\n", "\n", "def evaluate(file):\n", " reviews = parse_xl(file)\n", " ratings = []\n", " text = \"\"\n", "\n", " for review in reviews:\n", " ratings.append(int(classifier(review)[0]['label'].split('_')[1]))\n", " text += review\n", " text += \" \"\n", " \n", " inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n", " summary_ids = model.generate(inputs[\"input_ids\"], num_beams=2, min_length=50, max_length=1000)\n", " summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n", "\n", " inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n", " summary_ids_keywords = model_keywords.generate(inputs_keywords[\"input_ids\"], num_beams=2, min_length=0, max_length=100)\n", " keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] \n", "\n", " return round(mean(ratings), 2), summary, keywords\n", "\n", "iface = gr.Interface(\n", " fn=evaluate,\n", " inputs=gr.File(label=\"Reviews\", file_types=[\".xlsx\", \".xlsm\", \".xltx\", \".xltm\"]),\n", " outputs=[gr.Textbox(label=\"Rating\"), gr.Textbox(label=\"Summary\"), gr.Textbox(label=\"Keywords\")],\n", " title='Summarize Reviews',\n", " description=\"Evaluate and summarize collection of reviews. Reviews are submitted as an Excel file, where each reviews is in its own cell.\"\n", ")\n", "\n", "iface.launch(share=True)" ] } ], "metadata": { "kernelspec": { "display_name": "SolutionsInPR", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }