Jan90 commited on
Commit
3fb595f
·
verified ·
1 Parent(s): 8cb2f26

Upload final_code.py

Browse files
Files changed (1) hide show
  1. final_code.py +99 -0
final_code.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# Load model directly\n",
10
+ "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline\n",
11
+ "import torch\n",
12
+ "import gradio as gr\n",
13
+ "from openpyxl import load_workbook\n",
14
+ "from numpy import mean\n",
15
+ "\n",
16
+ "tokenizer = AutoTokenizer.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n",
17
+ "model = AutoModelForSeq2SeqLM.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n",
18
+ "\n",
19
+ "tokenizer_keywords = AutoTokenizer.from_pretrained(\"transformer3/H2-keywordextractor\")\n",
20
+ "model_keywords = AutoModelForSeq2SeqLM.from_pretrained(\"transformer3/H2-keywordextractor\")\n",
21
+ "\n",
22
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
23
+ "# Load the fine-tuned model and tokenizer\n",
24
+ "new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')\n",
25
+ "new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')\n",
26
+ "\n",
27
+ "\n",
28
+ "# Create a classification pipeline\n",
29
+ "classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)\n",
30
+ "\n",
31
+ "# Add label mapping for sentiment analysis\n",
32
+ "label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}\n",
33
+ "\n",
34
+ "def parse_xl(file_path):\n",
35
+ " cells = []\n",
36
+ "\n",
37
+ " workbook = load_workbook(filename=file_path)\n",
38
+ " for sheet in workbook.worksheets:\n",
39
+ " for row in sheet.iter_rows():\n",
40
+ " for cell in row:\n",
41
+ " if cell.value != None:\n",
42
+ " cells.append(cell.value)\n",
43
+ "\n",
44
+ " return cells\n",
45
+ "\n",
46
+ "def evaluate(file):\n",
47
+ " reviews = parse_xl(file)\n",
48
+ " ratings = []\n",
49
+ " text = \"\"\n",
50
+ "\n",
51
+ " for review in reviews:\n",
52
+ " ratings.append(int(classifier(review)[0]['label'].split('_')[1]))\n",
53
+ " text += review\n",
54
+ " text += \" \"\n",
55
+ " \n",
56
+ " inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n",
57
+ " summary_ids = model.generate(inputs[\"input_ids\"], num_beams=2, min_length=50, max_length=1000)\n",
58
+ " summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n",
59
+ "\n",
60
+ " inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n",
61
+ " summary_ids_keywords = model_keywords.generate(inputs_keywords[\"input_ids\"], num_beams=2, min_length=0, max_length=100)\n",
62
+ " keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] \n",
63
+ "\n",
64
+ " return round(mean(ratings), 2), summary, keywords\n",
65
+ "\n",
66
+ "iface = gr.Interface(\n",
67
+ " fn=evaluate,\n",
68
+ " inputs=gr.File(label=\"Reviews\", file_types=[\".xlsx\", \".xlsm\", \".xltx\", \".xltm\"]),\n",
69
+ " outputs=[gr.Textbox(label=\"Rating\"), gr.Textbox(label=\"Summary\"), gr.Textbox(label=\"Keywords\")],\n",
70
+ " title='Summarize Reviews',\n",
71
+ " description=\"Evaluate and summarize collection of reviews. Reviews are submitted as an Excel file, where each reviews is in its own cell.\"\n",
72
+ ")\n",
73
+ "\n",
74
+ "iface.launch(share=True)"
75
+ ]
76
+ }
77
+ ],
78
+ "metadata": {
79
+ "kernelspec": {
80
+ "display_name": "SolutionsInPR",
81
+ "language": "python",
82
+ "name": "python3"
83
+ },
84
+ "language_info": {
85
+ "codemirror_mode": {
86
+ "name": "ipython",
87
+ "version": 3
88
+ },
89
+ "file_extension": ".py",
90
+ "mimetype": "text/x-python",
91
+ "name": "python",
92
+ "nbconvert_exporter": "python",
93
+ "pygments_lexer": "ipython3",
94
+ "version": "3.12.3"
95
+ }
96
+ },
97
+ "nbformat": 4,
98
+ "nbformat_minor": 2
99
+ }