Spaces:

Oliver12315
/

Brand_Tone_of_Voice_Online_Demo

Runtime error

App Files Files Community

Oliver12315 commited on Dec 24, 2023

Commit

10fa1e9

1 Parent(s): 621f0bd

Upload core files

Browse files

Files changed (6) hide show

.gitignore +3 -0
Prediction.py +83 -0
README.md +5 -5
app.py +122 -4
convert.py +30 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+/output/*
+.vscode
+__pycache__

Prediction.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import pandas as pd
+from tqdm.auto import tqdm
+import torch
+from transformers import BertTokenizerFast as BertTokenizer, BertForSequenceClassification
+import os
+import glob
+RANDOM_SEED = 42
+pd.RANDOM_SEED = 42
+LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone", "None"]
+@torch.no_grad()
+def predict_csv(data, text_col, tokenizer, model, device, text_bs=16, max_token_len=128):
+    predictions = []
+    post = data[text_col]
+    num_text = len(post)
+    generator = range(0, num_text, text_bs)
+    for i in tqdm(generator, total=len(generator), desc="Processing..."):
+      texts = post[i: min(num_text, i+text_bs)].tolist()
+      encoding = tokenizer(
+          texts,
+          add_special_tokens=True,
+          max_length=max_token_len,
+          return_token_type_ids=False,
+          padding="max_length",
+          truncation=True,
+          return_attention_mask=True,
+          return_tensors='pt',
+        )
+      logits = model(
+          encoding["input_ids"].to(device),
+          encoding["attention_mask"].to(device),
+          return_dict=True
+      ).logits
+      prediction = torch.softmax(logits, dim=1)
+      predictions.append(prediction.detach().cpu())
+    final_pred = torch.cat(predictions, dim=0)
+    y_inten = final_pred.numpy().T
+    for i in range(len(LABEL_COLUMNS)):
+      data[LABEL_COLUMNS[i]] = [round(i, 8) for i in y_inten[i].tolist()]
+    return data
+@torch.no_grad()
+def predict_single(sentence, tokenizer, model, device, max_token_len=128):
+    encoding = tokenizer(
+        sentence,
+        add_special_tokens=True,
+        max_length=max_token_len,
+        return_token_type_ids=False,
+        padding="max_length",
+        truncation=True,
+        return_attention_mask=True,
+        return_tensors='pt',
+      )
+    logits = model(
+        encoding["input_ids"].to(device),
+        encoding["attention_mask"].to(device),
+        return_dict=True
+    ).logits
+    prediction = torch.softmax(logits, dim=1)
+    y_inten = prediction.flatten().cpu().numpy().T.tolist()
+    y_inten = [round(i, 8) for i in y_inten]
+    return y_inten
+if __name__ == "__main__":
+  Data = pd.read_csv("assets/Kickstarter_sentence_level_5000.csv")
+  Data = Data[:20]
+  device = torch.device('cpu')
+  # Load model directly
+  tokenizer = BertTokenizer.from_pretrained("Oliver12315/Brand_Tone_of_Voice")
+  model = BertForSequenceClassification.from_pretrained("Oliver12315/Brand_Tone_of_Voice")
+  model = model.to(device)
+  fk_doc_result = predict_csv(Data,"content", tokenizer, model, device)
+  single_response = predict_single("Games of the imagination teach us actions have consequences in a realm that can be reset.", tokenizer, model, device)
+  fk_doc_result.to_csv(f"output/prediction_Brand_Tone_of_Voice.csv")

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Brand Tone Of Voice Online Demo
-emoji: 🐠
-colorFrom: gray
-colorTo: pink
 sdk: gradio
-sdk_version: 4.12.0
 app_file: app.py
 pinned: false
 license: mit

 ---
+title: Murphy
+emoji: 📊
+colorFrom: purple
+colorTo: red
 sdk: gradio
+sdk_version: 4.10.0
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -1,7 +1,125 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+import pandas as pd
+import matplotlib.pyplot as plt
+from Prediction import *
+import os
+from datetime import datetime
+examples = []
+if os.path.exists("assets/examples.txt"):
+    with open("assets/examples.txt", "r", encoding="utf8") as file:
+        for sentence in file:
+            sentence = sentence.strip()
+            examples.append(sentence)
+else:
+    examples = [
+        "Games of the imagination teach us actions have consequences in a realm that can be reset.",
+        "But New Jersey farmers are retiring and all over the state, development continues to push out dwindling farmland.",
+        "He also is the Head Designer of The Design Trust so-to-speak, besides his regular job ..."
+        ]
+device = torch.device('cpu')
+tokenizer = BertTokenizer.from_pretrained("Oliver12315/Brand_Tone_of_Voice")
+model = BertForSequenceClassification.from_pretrained("Oliver12315/Brand_Tone_of_Voice")
+model = model.to(device)
+def single_sentence(sentence):
+    predictions = predict_single(sentence, tokenizer, model, device)
+    predictions.sort(reverse=True)
+    return list(zip(LABEL_COLUMNS, predictions))
+def csv_process(csv_file, attr="content"):
+    current_time = datetime.now()
+    formatted_time = current_time.strftime("%Y_%m_%d_%H_%M_%S")
+    data = pd.read_csv(csv_file.name)
+    data = data.reset_index()
+    os.makedirs('output', exist_ok=True)
+    outputs = []
+    predictions = predict_csv(data, attr, tokenizer, model, device)
+    output_path = f"output/prediction_Brand_Tone_of_Voice_{formatted_time}.csv"
+    predictions.to_csv(output_path)
+    outputs.append(output_path)
+    return outputs
+my_theme = gr.Theme.from_hub("JohnSmith9982/small_and_pretty")
+with gr.Blocks(theme=my_theme, title='Brand_Tone_of_Voice_demo') as demo:
+    gr.HTML(
+        """
+        <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+        <a href="https://github.com/xxx" style="margin-right: 20px; text-decoration: none; display: flex; align-items: center;">
+        </a>
+        <div>
+            <h1 >Place the title of the paper here</h1>
+            <h5 style="margin: 0;">If you like our project, please give us a star ✨ on Github for the latest update.</h5>
+            <div style="display: flex; justify-content: center; align-items: center; text-align: center;>
+                <a href="https://arxiv.org/abs/xx.xx"><img src="https://img.shields.io/badge/Arxiv-xx.xx-red"></a>
+                <a href='https://huggingface.co/spaces/Oliver12315/Brand_Tone_of_Voice_demo'><img src='https://img.shields.io/badge/Project_Page-Oliver12315/Brand_Tone_of_Voice_demo' alt='Project Page'></a>
+                <a href='https://github.com'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
+            </div>
+        </div>
+        </div>
+        """)
+    with gr.Tab("Single Sentence"):
+        with gr.Row():
+            tbox_input = gr.Textbox(label="Input",
+                                    info="Please input a sentence here:")
+            gr.Markdown("""
+                # Detailed information about our model:
+                ...
+                """)
+        tab_output = gr.DataFrame(label='Predictions:',
+                                  headers=["Label", "Probability"],
+                                  datatype=["str", "number"],
+                                  interactive=False)
+        with gr.Row():
+            button_ss = gr.Button("Submit", variant="primary")
+            button_ss.click(fn=single_sentence, inputs=[tbox_input], outputs=[tab_output])
+            gr.ClearButton([tbox_input, tab_output])
+        gr.Examples(
+            examples=examples,
+            inputs=tbox_input,
+            examples_per_page=len(examples)
+        )
+    with gr.Tab("Csv File"):
+        with gr.Row():
+            csv_input = gr.File(label="CSV File:",
+                                file_types=['.csv'],
+                                file_count="single"
+                                )
+            csv_output = gr.File(label="Predictions:")
+        with gr.Row():
+            button = gr.Button("Submit", variant="primary")
+            button.click(fn=csv_process, inputs=[csv_input], outputs=[csv_output])
+            gr.ClearButton([csv_input, csv_output])
+        gr.Markdown("## Examples \n The incoming CSV must include the ``content`` field, which represents the text that needs to be predicted!")
+        gr.DataFrame(label='Csv input format:',
+                    value=[[i, examples[i]] for i in range(len(examples))],
+                    headers=["index", "content"],
+                    datatype=["number","str"],
+                    interactive=False
+                    )
+    with gr.Tab("Readme"):
+        gr.Markdown(
+            """
+            # Paper Name
+            # Authors
+            + First author
+            + Corresponding author
+            # Detailed Information
+            ...
+            """
+        )
+demo.launch()

convert.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+import glob
+import os
+from transformers import BertTokenizerFast as BertTokenizer, BertForSequenceClassification
+os.environ['https_proxy'] = "127.0.0.1:1081"
+LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone", "None"]
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=5)
+id2label = {i:label for i,label in enumerate(LABEL_COLUMNS)}
+label2id = {label:i for i,label in enumerate(LABEL_COLUMNS)}
+for ckpt in glob.glob('checkpoints/*.ckpt'):
+    base_name = os.path.basename(ckpt)
+    # 去除文件后缀
+    model_name = os.path.splitext(base_name)[0]
+    params = torch.load(ckpt, map_location="cpu")['state_dict']
+    msg = model.load_state_dict(params, strict=True)
+    path = f'models/{model_name}'
+    os.makedirs(path, exist_ok=True)
+    torch.save(model.state_dict(), f'{path}/pytorch_model.bin')
+    config = model.config
+    config.architectures = ['BertForSequenceClassification']
+    config.label2id = label2id
+    config.id2label = id2label
+    model.config.to_json_file(f'{path}/config.json')
+    tokenizer.save_vocabulary(path)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+transformers
+tqdm
+pandas
+datetime