azharaslam commited on
Commit
6998869
·
verified ·
1 Parent(s): bfe4290

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +62 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ from transformers import AutoModelForCausalLM, AutoProcessor
4
+ import gradio as gr
5
+
6
+ # Define the model and processor
7
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+ API_TOKEN = "jPXZV69OTMUOmNTVOhX0B4770c3EjpnH" # Replace with your Hugging Face API token
9
+
10
+ PROCESSOR = AutoProcessor.from_pretrained(
11
+ "HuggingFaceM4/VLM_WebSight_finetuned",
12
+ token=API_TOKEN,
13
+ )
14
+ MODEL = AutoModelForCausalLM.from_pretrained(
15
+ "HuggingFaceM4/VLM_WebSight_finetuned",
16
+ token=API_TOKEN,
17
+ trust_remote_code=True,
18
+ ).to(DEVICE)
19
+
20
+ image_seq_len = MODEL.config.perceiver_config.resampler_n_latents
21
+ BOS_TOKEN = PROCESSOR.tokenizer.bos_token
22
+ BAD_WORDS_IDS = PROCESSOR.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
23
+
24
+ # Image preprocessing
25
+ def convert_to_rgb(image):
26
+ if image.mode == "RGB":
27
+ return image
28
+ image_rgba = image.convert("RGBA")
29
+ background = Image.new("RGBA", image_rgba.size, (255, 255, 255))
30
+ alpha_composite = Image.alpha_composite(background, image_rgba)
31
+ return alpha_composite.convert("RGB")
32
+
33
+ def custom_transform(x):
34
+ x = convert_to_rgb(x)
35
+ x = x.resize((960, 960), Image.BILINEAR)
36
+ x = torch.tensor(x).permute(2, 0, 1) / 255.0
37
+ x = (x - PROCESSOR.image_processor.image_mean[:, None, None]) / PROCESSOR.image_processor.image_std[:, None, None]
38
+ return x.unsqueeze(0)
39
+
40
+ # Function to generate HTML/CSS code
41
+ def generate_code(image):
42
+ inputs = PROCESSOR.tokenizer(
43
+ f"{BOS_TOKEN}<fake_token_around_image>{'<image>' * image_seq_len}<fake_token_around_image>",
44
+ return_tensors="pt",
45
+ add_special_tokens=False,
46
+ )
47
+ inputs["pixel_values"] = custom_transform(image).to(DEVICE)
48
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
49
+ generated_ids = MODEL.generate(**inputs, bad_words_ids=BAD_WORDS_IDS, max_length=4096)
50
+ generated_text = PROCESSOR.batch_decode(generated_ids, skip_special_tokens=True)[0]
51
+ return generated_text
52
+
53
+ # Gradio Interface
54
+ iface = gr.Interface(
55
+ fn=generate_code,
56
+ inputs=gr.inputs.Image(type="pil"),
57
+ outputs="text",
58
+ title="WebInsight - Generate HTML/CSS from Mockup",
59
+ description="Upload a website component image to generate corresponding HTML/CSS code."
60
+ )
61
+
62
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ Pillow
3
+ transformers
4
+ gradio