yehtutmaung commited on
Commit
e1ec4a7
·
verified ·
1 Parent(s): b01bbbc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ from transformers import AutoModel, AutoTokenizer
4
+ import streamlit as st
5
+ from transformers import pipeline
6
+ from huggingface_hub import InferenceClient
7
+ import os
8
+
9
+ # Define your API key here
10
+ my_key = "your_api_key_here"
11
+
12
+ # Load tokenizer and model
13
+ tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
14
+ model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2')
15
+ model.eval()
16
+
17
+ # Set device for model
18
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
19
+ model = model.to(device=device, dtype=torch.float16 if device == 'cuda' else torch.float32)
20
+
21
+ # Retrieve the API key from the environment
22
+ api_key = os.getenv("HF_API_KEY")
23
+
24
+ # Initialize the Hugging Face Inference client with the API key
25
+ client = InferenceClient(api_key=api_key)
26
+
27
+ # Streamlit UI setup
28
+ st.title("Image Questioning and Content Generation App")
29
+ st.write("Upload an image and ask a question. The model will respond with a description, and you can generate a song or story based on the response.")
30
+
31
+ # Upload an image
32
+ uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
33
+ if uploaded_image:
34
+ image = Image.open(uploaded_image).convert('RGB')
35
+ st.image(image, caption="Uploaded Image", use_column_width=True)
36
+
37
+ # Text input for the question
38
+ question = st.text_input("Ask a question about the image")
39
+ if question and uploaded_image:
40
+ msgs = [{'role': 'user', 'content': question}]
41
+
42
+ # Model's response to the image question
43
+ with st.spinner("Processing..."):
44
+ res, context, _ = model.chat(
45
+ image=image,
46
+ msgs=msgs,
47
+ context=None,
48
+ tokenizer=tokenizer,
49
+ sampling=True,
50
+ temperature=0.7
51
+ )
52
+
53
+ st.write("Model's response:", res)
54
+
55
+ # Options for generating content based on the response
56
+ option = st.selectbox("Generate content based on the response", ["Choose...", "Write a Song", "Write a Story"])
57
+
58
+ if option != "Choose...":
59
+ # Create a message based on user choice
60
+ if option == "Write a Song":
61
+ messages = [{"role": "user", "content": f"Write a song about the following: {res}"}]
62
+ elif option == "Write a Story":
63
+ messages = [{"role": "user", "content": f"Write a story about the following: {res}"}]
64
+
65
+ # Stream the content generation
66
+ st.write(f"Generating {option.lower()}...")
67
+
68
+ stream = client.chat.completions.create(
69
+ model="meta-llama/Llama-3.2-3B-Instruct",
70
+ messages=messages,
71
+ max_tokens=500,
72
+ stream=True
73
+ )
74
+
75
+ generated_text = ""
76
+ for chunk in stream:
77
+ generated_text += chunk.choices[0].delta.content
78
+ st.write(generated_text) # Display each chunk as it's generated