erikbeltran commited on
Commit
0dc3013
·
verified ·
1 Parent(s): 3acd52d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -22
app.py CHANGED
@@ -1,13 +1,18 @@
1
- import spaces
2
  import gradio as gr
3
- from huggingface_hub import InferenceClient
4
- from transformers import AutoTokenizer
5
  import torch
6
 
7
  # Initialize model and tokenizer
8
- model_name = "erikbeltran/pydiff"
9
- client = InferenceClient(model_name)
10
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
11
 
12
  def format_diff_response(response):
13
  """Format the response to look like a diff output"""
@@ -21,28 +26,40 @@ def format_diff_response(response):
21
  else:
22
  formatted.append(line)
23
  return '<br>'.join(formatted)
24
-
25
- @spaces.GPU
26
- def respond(request, file_content, system_message, max_tokens, temperature, top_p):
27
- messages = [
28
- {"role": "system", "content": system_message},
29
- {"role": "user", "content": f"""<request>{request}</request>
30
  <file>
31
  {file_content}
32
- </file>"""}
33
- ]
 
 
 
 
 
 
34
 
 
35
  response = ""
36
- for message in client.chat_completion(
37
- messages,
38
- max_tokens=max_tokens,
39
- stream=True,
 
40
  temperature=temperature,
41
  top_p=top_p,
42
- ):
43
- token = message.choices[0].delta.content
44
- response += token
45
- # Format as diff and yield
 
 
 
 
 
 
46
  yield format_diff_response(response)
47
 
48
  # Create the Gradio interface
 
 
1
  import gradio as gr
2
+ import spaces
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
  # Initialize model and tokenizer
7
+ MODEL_ID = "erikbeltran/pydiff"
8
+ GGUF_FILE = "unsloth.Q4_K_M.gguf"
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, gguf_file=GGUF_FILE)
11
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, gguf_file=GGUF_FILE)
12
+
13
+ # Move model to GPU if available
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ model = model.to(device)
16
 
17
  def format_diff_response(response):
18
  """Format the response to look like a diff output"""
 
26
  else:
27
  formatted.append(line)
28
  return '<br>'.join(formatted)
29
+
30
+ def create_prompt(request, file_content, system_message):
31
+ return f"""<system>{system_message}</system>
32
+ <request>{request}</request>
 
 
33
  <file>
34
  {file_content}
35
+ </file>"""
36
+
37
+ @spaces.GPU
38
+ def respond(request, file_content, system_message, max_tokens, temperature, top_p):
39
+ prompt = create_prompt(request, file_content, system_message)
40
+
41
+ # Tokenize input
42
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
43
 
44
+ # Generate response with streaming
45
  response = ""
46
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
47
+
48
+ generation_kwargs = dict(
49
+ inputs=inputs["input_ids"],
50
+ max_new_tokens=max_tokens,
51
  temperature=temperature,
52
  top_p=top_p,
53
+ streamer=streamer,
54
+ )
55
+
56
+ # Start generation in a separate thread
57
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
58
+ thread.start()
59
+
60
+ # Yield formatted responses as they're generated
61
+ for new_text in streamer:
62
+ response += new_text
63
  yield format_diff_response(response)
64
 
65
  # Create the Gradio interface