Spaces:

alaamostafa
/

Microsoft-Phi-2-deploy-CPU

Sleeping

App Files Files Community

alaamostafa commited on Mar 5

Commit

4f06d80

verified ·

1 Parent(s): afba7e5

Create app.py

Browse files

Files changed (1) hide show

app.py +184 -0

app.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import os
+# Set up model parameters
+MODEL_ID = "alaamostafa/Microsoft-Phi-2"
+BASE_MODEL_ID = "microsoft/phi-2"
+# Force CPU usage and set up offload directory
+device = "cpu"
+print(f"Using device: {device}")
+os.makedirs("offload_dir", exist_ok=True)
+# Disable bitsandbytes for CPU usage
+os.environ["BITSANDBYTES_NOWELCOME"] = "1"
+# Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
+# Load base model with simple CPU configuration, avoiding device_map and 8-bit loading
+print("Loading base model...")
+try:
+    base_model = AutoModelForCausalLM.from_pretrained(
+        BASE_MODEL_ID,
+        torch_dtype=torch.float32,  # Use float32 for CPU
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,     # Optimize for lower memory usage
+        offload_folder="offload_dir" # Set offload directory
+    )
+    # Load the fine-tuned adapter
+    print(f"Loading adapter from {MODEL_ID}...")
+    model = PeftModel.from_pretrained(
+        base_model,
+        MODEL_ID,
+        offload_folder="offload_dir"
+    )
+    print("Model loaded successfully!")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    # Create a placeholder error message for the UI
+    error_message = f"Failed to load model: {str(e)}\n\nThis Space may need a GPU to run properly."
+def generate_text(
+    prompt,
+    max_length=256,  # Reduced for CPU
+    temperature=0.7,
+    top_p=0.9,
+    top_k=40,
+    repetition_penalty=1.1
+):
+    """Generate text based on prompt with the fine-tuned model"""
+    try:
+        # Prepare input
+        inputs = tokenizer(prompt, return_tensors="pt")
+        # Generate text
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_length=max_length,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                repetition_penalty=repetition_penalty,
+                do_sample=temperature > 0,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        # Decode and return the generated text
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return generated_text
+    except Exception as e:
+        return f"Error generating text: {str(e)}"
+# Create the Gradio interface
+css = """
+.gradio-container {max-width: 800px !important}
+.gr-prose code {white-space: pre-wrap !important}
+"""
+title = "Neuroscience Fine-tuned Phi-2 Model (CPU Version)"
+description = """
+This is a fine-tuned version of Microsoft's Phi-2 model, adapted specifically for neuroscience domain content.
+⚠️ **Note: This model is running on CPU which means responses will be slower.** ⚠️
+For best performance:
+- Keep your prompts focused and clear
+- Use shorter maximum length settings (128-256)
+- Be patient as generation can take 30+ seconds
+**Example prompts:**
+- Recent advances in neuroimaging suggest that
+- The role of dopamine in learning and memory involves
+- Explain the concept of neuroplasticity in simple terms
+- What are the key differences between neurons and glial cells?
+"""
+# Check if model loaded successfully
+if 'error_message' in locals():
+    # Simple error interface
+    demo = gr.Interface(
+        fn=lambda x: error_message,
+        inputs=gr.Textbox(label="This model cannot be loaded on CPU"),
+        outputs=gr.Textbox(),
+        title=title,
+        description=description
+    )
+else:
+    # Full interface
+    with gr.Blocks(css=css) as demo:
+        gr.Markdown(f"# {title}")
+        gr.Markdown(description)
+        with gr.Row():
+            with gr.Column():
+                prompt = gr.Textbox(
+                    label="Enter your prompt",
+                    placeholder="Recent advances in neuroscience suggest that",
+                    lines=5
+                )
+                with gr.Row():
+                    submit_btn = gr.Button("Generate", variant="primary")
+                    clear_btn = gr.Button("Clear")
+                with gr.Accordion("Advanced Options", open=False):
+                    max_length = gr.Slider(
+                        minimum=64, maximum=512, value=256, step=64,
+                        label="Maximum Length (lower is faster on CPU)"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.0, maximum=1.5, value=0.7, step=0.1,
+                        label="Temperature (0 = deterministic, 0.7 = creative, 1.5 = random)"
+                    )
+                    top_p = gr.Slider(
+                        minimum=0.1, maximum=1.0, value=0.9, step=0.1,
+                        label="Top-p (nucleus sampling)"
+                    )
+                    top_k = gr.Slider(
+                        minimum=1, maximum=100, value=40, step=1,
+                        label="Top-k"
+                    )
+                    repetition_penalty = gr.Slider(
+                        minimum=1.0, maximum=2.0, value=1.1, step=0.1,
+                        label="Repetition Penalty"
+                    )
+            with gr.Column():
+                output = gr.Textbox(
+                    label="Generated Text",
+                    lines=20
+                )
+        # Set up event handlers
+        submit_btn.click(
+            fn=generate_text,
+            inputs=[prompt, max_length, temperature, top_p, top_k, repetition_penalty],
+            outputs=output
+        )
+        clear_btn.click(
+            fn=lambda: ("", None),
+            inputs=None,
+            outputs=[prompt, output]
+        )
+        # Example prompts
+        examples = [
+            ["Recent advances in neuroimaging suggest that"],
+            ["The role of dopamine in learning and memory involves"],
+            ["Explain the concept of neuroplasticity in simple terms"],
+            ["What are the key differences between neurons and glial cells?"]
+        ]
+        gr.Examples(
+            examples=examples,
+            inputs=prompt
+        )
+# Launch the app
+demo.launch()