Spaces:
Build error
Build error
azharaslam
commited on
Upload 2 files
Browse files- app.py +62 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from PIL import Image
|
3 |
+
from transformers import AutoModelForCausalLM, AutoProcessor
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
# Define the model and processor
|
7 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
8 |
+
API_TOKEN = "jPXZV69OTMUOmNTVOhX0B4770c3EjpnH" # Replace with your Hugging Face API token
|
9 |
+
|
10 |
+
PROCESSOR = AutoProcessor.from_pretrained(
|
11 |
+
"HuggingFaceM4/VLM_WebSight_finetuned",
|
12 |
+
token=API_TOKEN,
|
13 |
+
)
|
14 |
+
MODEL = AutoModelForCausalLM.from_pretrained(
|
15 |
+
"HuggingFaceM4/VLM_WebSight_finetuned",
|
16 |
+
token=API_TOKEN,
|
17 |
+
trust_remote_code=True,
|
18 |
+
).to(DEVICE)
|
19 |
+
|
20 |
+
image_seq_len = MODEL.config.perceiver_config.resampler_n_latents
|
21 |
+
BOS_TOKEN = PROCESSOR.tokenizer.bos_token
|
22 |
+
BAD_WORDS_IDS = PROCESSOR.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
23 |
+
|
24 |
+
# Image preprocessing
|
25 |
+
def convert_to_rgb(image):
|
26 |
+
if image.mode == "RGB":
|
27 |
+
return image
|
28 |
+
image_rgba = image.convert("RGBA")
|
29 |
+
background = Image.new("RGBA", image_rgba.size, (255, 255, 255))
|
30 |
+
alpha_composite = Image.alpha_composite(background, image_rgba)
|
31 |
+
return alpha_composite.convert("RGB")
|
32 |
+
|
33 |
+
def custom_transform(x):
|
34 |
+
x = convert_to_rgb(x)
|
35 |
+
x = x.resize((960, 960), Image.BILINEAR)
|
36 |
+
x = torch.tensor(x).permute(2, 0, 1) / 255.0
|
37 |
+
x = (x - PROCESSOR.image_processor.image_mean[:, None, None]) / PROCESSOR.image_processor.image_std[:, None, None]
|
38 |
+
return x.unsqueeze(0)
|
39 |
+
|
40 |
+
# Function to generate HTML/CSS code
|
41 |
+
def generate_code(image):
|
42 |
+
inputs = PROCESSOR.tokenizer(
|
43 |
+
f"{BOS_TOKEN}<fake_token_around_image>{'<image>' * image_seq_len}<fake_token_around_image>",
|
44 |
+
return_tensors="pt",
|
45 |
+
add_special_tokens=False,
|
46 |
+
)
|
47 |
+
inputs["pixel_values"] = custom_transform(image).to(DEVICE)
|
48 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
49 |
+
generated_ids = MODEL.generate(**inputs, bad_words_ids=BAD_WORDS_IDS, max_length=4096)
|
50 |
+
generated_text = PROCESSOR.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
51 |
+
return generated_text
|
52 |
+
|
53 |
+
# Gradio Interface
|
54 |
+
iface = gr.Interface(
|
55 |
+
fn=generate_code,
|
56 |
+
inputs=gr.inputs.Image(type="pil"),
|
57 |
+
outputs="text",
|
58 |
+
title="WebInsight - Generate HTML/CSS from Mockup",
|
59 |
+
description="Upload a website component image to generate corresponding HTML/CSS code."
|
60 |
+
)
|
61 |
+
|
62 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
Pillow
|
3 |
+
transformers
|
4 |
+
gradio
|