Spaces:

Ngadou
/

Social_Engineering_Detection

Paused

App Files Files Community

Ngadou commited on Jul 9, 2023

Commit

d9a6726

1 Parent(s): 403ec51

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -46

app.py CHANGED Viewed

@@ -7,16 +7,16 @@ from transformers import AutoModelForCausalLM
 config = PeftConfig.from_pretrained("Ngadou/falcon-7b-scam-buster")
 model = AutoModelForCausalLM.from_pretrained("vilsonrodrigues/falcon-7b-instruct-sharded", trust_remote_code=True)
 model = PeftModel.from_pretrained(model, "Ngadou/falcon-7b-scam-buster").to("cuda")
-tokenizer = AutoTokenizer.from_pretrained("Ngadou/falcon-7b-scam-buster")
 def generate(chat):
-  input_text = chat + "\nIs this conversation a scam or not and why?"
-  encoding = tokenizer(input_text, return_tensors="pt").to("cuda")
-  output = model.generate(
       input_ids=encoding.input_ids,
       attention_mask=encoding.attention_mask,
       max_new_tokens=100,
@@ -25,50 +25,52 @@ def generate(chat):
       eos_token_id=tokenizer.eos_token_id,
       top_k = 0
       )
-  output_text = tokenizer.decode(output[0], skip_special_tokens=True)
-  output_text = output_text.replace(example_text, "").lstrip("\n")
-  print("\nAnswer:")
-  print(output_text)
-def is_scam(instruction):
-    max_new_tokens=128
-    temperature=0.1
-    top_p=0.75
-    top_k=40
-    num_beams=4
-    instruction = instruction + ".\nIs this conversation a scam or not and why?"
-    prompt = instruction + "\n### Solution:\n"
-    inputs = tokenizer(prompt, return_tensors="pt")
-    input_ids = inputs["input_ids"].to("cuda")
-    attention_mask = inputs["attention_mask"].to("cuda")
-    generation_config = GenerationConfig(
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        num_beams=num_beams,
-    )
-    with torch.no_grad():
-        generation_output = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            generation_config=generation_config,
-            return_dict_in_generate=True,
-            output_scores=True,
-            max_new_tokens=max_new_tokens,
-            early_stopping=True
-        )
-    s = generation_output.sequences[0]
-    output = tokenizer.decode(s)
-    results = output.split("### Solution:")[1].lstrip("\n").split('\n')
-    # The format of the output should be adjusted according to your model's output
-    classification = results  # Assumes first line is the classification
-    #reason = results[1] if len(results) > 1 else ""  # Assumes the rest is the reason
-    return classification #, reason
 # Define the Gradio interface

 config = PeftConfig.from_pretrained("Ngadou/falcon-7b-scam-buster")
 model = AutoModelForCausalLM.from_pretrained("vilsonrodrigues/falcon-7b-instruct-sharded", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("vilsonrodrigues/falcon-7b-instruct-sharded")
 model = PeftModel.from_pretrained(model, "Ngadou/falcon-7b-scam-buster").to("cuda")
 def generate(chat):
+    input_text = chat + "\nIs this conversation a scam or not and why?"
+    encoding = tokenizer(input_text, return_tensors="pt").to("cuda")
+    output = model.generate(
       input_ids=encoding.input_ids,
       attention_mask=encoding.attention_mask,
       max_new_tokens=100,
       eos_token_id=tokenizer.eos_token_id,
       top_k = 0
       )
+    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    output_text = output_text.replace(example_text, "").lstrip("\n")
+    print("\nAnswer:")
+    print(output_text)
+    return output_text
+# def is_scam(instruction):
+#     max_new_tokens=128
+#     temperature=0.1
+#     top_p=0.75
+#     top_k=40
+#     num_beams=4
+#     instruction = instruction + ".\nIs this conversation a scam or not and why?"
+#     prompt = instruction + "\n### Solution:\n"
+#     inputs = tokenizer(prompt, return_tensors="pt")
+#     input_ids = inputs["input_ids"].to("cuda")
+#     attention_mask = inputs["attention_mask"].to("cuda")
+#     generation_config = GenerationConfig(
+#         temperature=temperature,
+#         top_p=top_p,
+#         top_k=top_k,
+#         num_beams=num_beams,
+#     )
+#     with torch.no_grad():
+#         generation_output = model.generate(
+#             input_ids=input_ids,
+#             attention_mask=attention_mask,
+#             generation_config=generation_config,
+#             return_dict_in_generate=True,
+#             output_scores=True,
+#             max_new_tokens=max_new_tokens,
+#             early_stopping=True
+#         )
+#     s = generation_output.sequences[0]
+#     output = tokenizer.decode(s)
+#     results = output.split("### Solution:")[1].lstrip("\n").split('\n')
+#     # The format of the output should be adjusted according to your model's output
+#     classification = results  # Assumes first line is the classification
+#     #reason = results[1] if len(results) > 1 else ""  # Assumes the rest is the reason
+#     return classification #, reason
 # Define the Gradio interface