hosseinhimself commited on
Commit
393b5f6
·
verified ·
1 Parent(s): db77b63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -69
app.py CHANGED
@@ -1,76 +1,60 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
 
 
 
4
 
5
- # Define device
6
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
-
8
- # Load the model and tokenizer
9
  model_name = "hosseinhimself/ISANG-v1.0-8B"
10
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
11
- model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
12
-
13
- def chat_with_model(history, user_input):
14
- """
15
- Generate a response using the model, considering the last two interactions.
16
-
17
- Parameters:
18
- history (list of tuples): Conversation history as a list of (user, bot) pairs.
19
- user_input (str): The latest user input.
20
-
21
- Returns:
22
- history (list of tuples): Updated conversation history.
23
- """
24
- # Use the last two interactions for context
25
- context = ""
26
- for user_message, bot_message in history[-2:]:
27
- context += f"User: {user_message}\nBot: {bot_message}\n"
28
-
29
- # Add the current user input
30
- context += f"User: {user_input}\nBot:"
31
-
32
- # Tokenize and generate a response
33
- inputs = tokenizer(context, return_tensors="pt", truncation=True).to(device)
34
- output = model.generate(inputs.input_ids, max_new_tokens=100)
35
- bot_response = tokenizer.decode(output[0], skip_special_tokens=True)
36
-
37
- # Extract only the bot's new response (to avoid repeating context)
38
- bot_response = bot_response[len(context):].strip()
39
-
40
- # Update the conversation history
41
- history.append((user_input, bot_response))
42
-
43
- return history
44
-
45
- def gradio_format(history):
46
- """
47
- Format the history for Gradio ChatInterface.
48
-
49
- Parameters:
50
- history (list of tuples): Conversation history as a list of (user, bot) pairs.
51
-
52
- Returns:
53
- List of dictionaries compatible with Gradio ChatInterface.
54
- """
55
- return [[user, bot] for user, bot in history]
56
-
57
- # Initialize empty history
58
- history = []
59
-
60
- def interface_function(user_input):
61
- global history
62
- history = chat_with_model(history, user_input)
63
- return gradio_format(history)
64
-
65
- # Create Gradio interface
66
- chatbot = gr.ChatInterface(
67
- fn=interface_function,
68
- inputs=[gr.Textbox(lines=2, label="Your Input")],
69
- outputs=[gr.Chatbot(label="Chat History")],
70
- title="Persian Chatbot",
71
- description="A chatbot that translates or responds to Persian prompts using ISANG-v1.0-8B model."
72
  )
73
 
74
- # Launch the app
75
  if __name__ == "__main__":
76
- chatbot.launch()
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from peft import PeftModel, PeftConfig
5
+ import spaces
6
+ import time
7
 
 
 
 
 
8
  model_name = "hosseinhimself/ISANG-v1.0-8B"
9
+ base_model_name = "unsloth/Meta-Llama-3.1-8B"
10
+
11
+ # Load tokenizer globally
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
13
+
14
+ @spaces.GPU
15
+ def load_model():
16
+ try:
17
+ # Load the base model
18
+ base_model = AutoModelForCausalLM.from_pretrained(
19
+ base_model_name,
20
+ torch_dtype=torch.float16,
21
+ device_map="auto",
22
+ trust_remote_code=True,
23
+ low_cpu_mem_usage=True
24
+ )
25
+ # Load the PEFT model
26
+ model = PeftModel.from_pretrained(base_model, model_name)
27
+ print(f"Model loaded successfully. Using device: {model.device}")
28
+ return model
29
+ except Exception as e:
30
+ print(f"Error loading model: {e}")
31
+ raise
32
+
33
+ @spaces.GPU
34
+ def generate_text(prompt):
35
+ model = load_model()
36
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
37
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
38
+ with torch.no_grad():
39
+ outputs = model.generate(**inputs, max_new_tokens=200, num_return_sequences=1, temperature=0.7)
40
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
+ return response
42
+
43
+ gradio_app = gr.Interface(
44
+ generate_text,
45
+ inputs=gr.Textbox(label="Enter your message", lines=3),
46
+ outputs=gr.Textbox(label="Chatbot Response"),
47
+ title="ISANG Chatbot",
48
+ description=f"""This is a simple chatbot powered by the ISANG model. It is fine-tuned from the {base_model_name} model.
49
+ Enter your message and see how the chatbot responds!""",
50
+ examples=[
51
+ ["سلام، چطوری؟"],
52
+ ["برام یه داستان تعریف کن"],
53
+ ["بهترین کتابی که خوندی چی بوده؟"],
54
+ ["توی اوقات فراغتت چی کار می‌کنی؟"],
55
+ ["نظرت درباره هوش مصنوعی چیه؟"]
56
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  )
58
 
 
59
  if __name__ == "__main__":
60
+ gradio_app.launch()