muhtasham commited on
Commit
5ba0e1d
·
1 Parent(s): 6a85296
Files changed (4) hide show
  1. .Dockerfile +16 -0
  2. README copy.md +10 -0
  3. main.py +134 -0
  4. requirements.txt +10 -0
.Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM tiangolo/uvicorn-gunicorn:python3.10-slim
2
+
3
+ # Copy the current directory contents into the container at /app
4
+ COPY . /app
5
+
6
+ # Set the working directory to /app
7
+ WORKDIR /app
8
+
9
+ # Install requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r /requirements.txt
11
+
12
+ # Expose the port the app runs on
13
+ EXPOSE 7860
14
+
15
+ # Start the FastAPI app on port 7860
16
+ CMD ["fastapi", "run", "main.py", "--host", "0.0.0.0", "--port", "7860"]
README copy.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Agent
3
+ emoji: 🌖
4
+ colorFrom: red
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
main.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException, Form
2
+ from fastapi.responses import JSONResponse
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from PIL import Image
5
+ from openai import AsyncOpenAI
6
+ from pydantic import BaseModel
7
+ from fastapi.logger import logger
8
+
9
+ import io
10
+ import os
11
+ import multion
12
+ import torch
13
+ import instructor
14
+ import openai
15
+
16
+ from multion.client import MultiOn
17
+ from dotenv import load_dotenv
18
+
19
+ # Load environment variables from .env file
20
+ load_dotenv()
21
+
22
+ multion = MultiOn(api_key=os.environ.get("MULTION_API_KEY"))
23
+ logger.info("MultiOn API key loaded")
24
+
25
+ app = FastAPI()
26
+
27
+ device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
28
+ logger.info(f"Device: {device}")
29
+
30
+ model_id = "vikhyatk/moondream2"
31
+ revision = "2024-05-20"
32
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, revision=revision).to(device)
33
+ logger.info(f"Model loaded: {model_id} to {device}")
34
+ model = torch.compile(model)
35
+ logger.info(f"Model compiled: {model_id} to {device}")
36
+ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
37
+ logger.info(f"Tokenizer loaded: {model_id}")
38
+
39
+ client = instructor.from_openai(AsyncOpenAI(
40
+ # This is the default and can be omitted
41
+ api_key=os.environ.get("OPENAI_API_KEY"),
42
+ ))
43
+
44
+ class MultiOnInputBrowse(BaseModel):
45
+ """
46
+ A model for handling user commands that involve browsing actions.
47
+
48
+ Attributes:
49
+ cmd (str): The command to execute. Example: "post 'hello world - I love multion' on twitter".
50
+ url (str): The URL where the action should be performed. Example: "https://twitter.com".
51
+ local (bool): Flag indicating whether the action should be performed locally. Default is True.
52
+ """
53
+ cmd: str
54
+ url: str
55
+ local: bool = True
56
+
57
+ async def process_image_file(file: UploadFile) -> str:
58
+ """
59
+ Process an uploaded image file and generate a description using the model.
60
+
61
+ Args:
62
+ file (UploadFile): The uploaded image file.
63
+
64
+ Raises:
65
+ HTTPException: If the file type is not JPEG or PNG, or if there is an error processing the image.
66
+
67
+ Returns:
68
+ str: The description of the image.
69
+ """
70
+ if file.content_type not in ["image/jpeg", "image/png"]:
71
+ raise HTTPException(status_code=400, detail="Invalid file type. Only JPEG and PNG are supported.")
72
+
73
+ image_data = await file.read()
74
+ image = Image.open(io.BytesIO(image_data))
75
+
76
+ try:
77
+ enc_image = model.encode_image(image)
78
+ description = model.answer_question(enc_image, "Describe this image.", tokenizer)
79
+ return description
80
+ except Exception as e:
81
+ raise HTTPException(status_code=500, detail=str(e))
82
+
83
+ @app.post("/process-input/")
84
+ async def process_input(text: str = Form(...), file: UploadFile = File(None)):
85
+ if file is not None:
86
+ try:
87
+ logger.info("Processing image file")
88
+ image_description = await process_image_file(file)
89
+ logger.info(f"Image description: {image_description}")
90
+ except HTTPException as e:
91
+ raise e
92
+ else:
93
+ image_description = None
94
+
95
+ # Process the text and optionally include the image description
96
+ # Example: Concatenate text and image description
97
+ if image_description:
98
+ processed_text = f"{text} [Image Description: {image_description}]"
99
+ else:
100
+ processed_text = text
101
+
102
+ logger.info(f"Processed text: {processed_text}")
103
+ command = await generate_command(processed_text)
104
+ logger.info(f"Command generated: {command.message}")
105
+
106
+ try:
107
+ logger.info("Calling MultiOn API")
108
+ response = multion.browse(
109
+ cmd=command.cmd,
110
+ url=command.url,
111
+ local=command.local
112
+ )
113
+ logger.info(f"Response received: {response.message}")
114
+ return JSONResponse(content={"response": response.message, "command": command.model_dump()})
115
+
116
+ except Exception as e:
117
+ raise HTTPException(status_code=500, detail=f"Mution API error: {str(e)}")
118
+
119
+
120
+ async def generate_command(content: str) -> MultiOnInputBrowse:
121
+ try:
122
+ response = await openai.ChatCompletion.create(
123
+ model="gpt-4o",
124
+ messages=[
125
+ {
126
+ "role": "user",
127
+ "content": content,
128
+ }
129
+ ],
130
+ response_model=MultiOnInputBrowse
131
+ )
132
+ return response
133
+ except Exception as e:
134
+ raise HTTPException(status_code=500, detail=f"OpenAI API error: {str(e)}")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ openai
3
+ transformers
4
+ torch
5
+ torchvision
6
+ einops
7
+ multion
8
+ gradio
9
+ instructor
10
+ python-dotenv