cris177 commited on
Commit
ab7670e
·
verified ·
1 Parent(s): 772cf77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -10
app.py CHANGED
@@ -1,10 +1,21 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
2
  import gradio as gr
3
 
4
- model = AutoModelForCausalLM.from_pretrained("cris177/Qwen2-Simple-Arguments")
5
- tokenizer = AutoTokenizer.from_pretrained("cris177/Qwen2-Simple-Arguments")
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- argument = "If it's wednesday it's cold, and it's cold, therefore it's wednesday."
8
  def analyze_argument(argument):
9
  instruction = 'Based on the following argument, identify the following elements: premises, conclusion, propositions, type of argument, negation of propositions and validity.'
10
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@@ -17,19 +28,17 @@ def analyze_argument(argument):
17
 
18
  ### Response:"""
19
  prompt = alpaca_prompt.format(instruction, argument)
20
- input_ids = tokenizer(prompt, return_tensors="pt")
21
 
22
- outputs = model.generate(**input_ids, max_length=1000, num_return_sequences=1)
23
- output = tokenizer.decode(outputs[0])
24
- # remove prompt from output
25
- output = output.split("### Response:")[1].strip().split("<|endoftext|>")[0]
26
  return output
27
 
28
  description = """This tool analyzes simple arguments, that is, arguments composed of at most two propositions.
29
 
30
  It applies the fine-tuned LLM from https://huggingface.co/cris177/Qwen2-Simple-Arguments
31
 
32
- It requires only 6 GB of RAM, and runs on just 2 vCPUs (which causes it to run somewhat slowly in this demo).
 
 
33
  """
34
 
35
  gr.Interface(analyze_argument, inputs="text", outputs="text",
 
1
+ from huggingface_hub import hf_hub_download
2
+ from llama_cpp import Llama
3
  import gradio as gr
4
 
5
+ ## Download the GGUF model
6
+ model_name = "cris177/Qwen2-Simple-Arguments"
7
+ model_file = "Qwen2_arguments.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
8
+ model_path = hf_hub_download(model_name, filename=model_file)
9
+
10
+ ## Instantiate model from downloaded file
11
+ llm = Llama(
12
+ model_path=model_path,
13
+ n_ctx=2000, # Context length to use
14
+ n_threads=2, # Number of CPU threads to use
15
+ n_gpu_layers=0 # Number of model layers to offload to GPU
16
+ )
17
+
18
 
 
19
  def analyze_argument(argument):
20
  instruction = 'Based on the following argument, identify the following elements: premises, conclusion, propositions, type of argument, negation of propositions and validity.'
21
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
28
 
29
  ### Response:"""
30
  prompt = alpaca_prompt.format(instruction, argument)
 
31
 
32
+ output = llm(prompt, max_tokens=1000)['choices'][0]['text'].strip()
 
 
 
33
  return output
34
 
35
  description = """This tool analyzes simple arguments, that is, arguments composed of at most two propositions.
36
 
37
  It applies the fine-tuned LLM from https://huggingface.co/cris177/Qwen2-Simple-Arguments
38
 
39
+ For faster inference we use the 4-bit quantization model https://huggingface.co/cris177/Qwen2-Simple-Arguments/resolve/main/Qwen2_arguments.Q4_K_M.gguf.
40
+
41
+ It requires only 3 GB of RAM, and runs on just 2 vCPUs (which causes it to run somewhat slowly in this demo).
42
  """
43
 
44
  gr.Interface(analyze_argument, inputs="text", outputs="text",