Spaces:

THUDM-HF-SPACE
/

CogAgent-Demo

Running on Zero

App Files Files Community

zR commited on Dec 23, 2024

Commit

d944583

1 Parent(s): 3deb6e9

fx

Browse files

Files changed (1) hide show

app.py +8 -6

app.py CHANGED Viewed

@@ -82,7 +82,7 @@ def predict(history, max_length, img_path, platform_str, format_str, output_dir)
     prev_len = len(history)
     query, image = preprocess_messages(history, img_path, platform_str, format_str)
-    model_inputs = tokenizer.apply_chat_template(
         [{"role": "user", "image": image, "content": query}],
         add_generation_prompt=True,
         tokenize=True,
@@ -94,12 +94,13 @@ def predict(history, max_length, img_path, platform_str, format_str, output_dir)
         tokenizer, timeout=60, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = {
-        "input_ids": model_inputs["input_ids"].to(model.device),
-        "attention_mask": model_inputs["attention_mask"].to(model.device),
         "streamer": streamer,
         "max_length": max_length,
-        "do_sample": False,
-        "top_p": 0.0,
         "top_k": 1,
     }
     t = Thread(target=model.generate, kwargs=generate_kwargs)
@@ -187,7 +188,8 @@ def main():
         gr.HTML("<h1 align='center'>CogAgent-9B-20241220 Demo</h1>")
         gr.HTML(
             "<p align='center' style='color:red;'>This demo is for learning and communication purposes only. Users must assume responsibility for the risks associated with AI-generated planning and operations.</p>"
-            "<p align='left' style='color:black;'>1. Upload an image. 2. Provide your instructions to CogAgent. 3. Wait for CogAgent to return specific operations, and if there are bounding boxes (Bbox), they will be displayed in the image area on the right.</p>"
         )
         with gr.Row():
             img_path = gr.Image(label="Upload a Screenshot", type="filepath", height=400)

     prev_len = len(history)
     query, image = preprocess_messages(history, img_path, platform_str, format_str)
+    inputs = tokenizer.apply_chat_template(
         [{"role": "user", "image": image, "content": query}],
         add_generation_prompt=True,
         tokenize=True,
         tokenizer, timeout=60, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = {
+        "input_ids": inputs["input_ids"],
+        "attention_mask": inputs["attention_mask"],
+        "position_ids": inputs["position_ids"],
+        "images": inputs["images"],
         "streamer": streamer,
         "max_length": max_length,
+        "do_sample": True,
         "top_k": 1,
     }
     t = Thread(target=model.generate, kwargs=generate_kwargs)
         gr.HTML("<h1 align='center'>CogAgent-9B-20241220 Demo</h1>")
         gr.HTML(
             "<p align='center' style='color:red;'>This demo is for learning and communication purposes only. Users must assume responsibility for the risks associated with AI-generated planning and operations.</p>"
+            "<p align='center' style='color:red;'>In this demo, the model assumes that the user is using a Mac operating system, so it is recommended to upload screenshots from a Mac operating system.</p>"
+            "<p align='left' style='color:black;'>1. Upload an image.<br>2. Provide your instructions to CogAgent.<br>3. Wait for CogAgent to return specific operations. If there are bounding boxes (Bbox), they will be displayed in the image area on the right.</p>"
         )
         with gr.Row():
             img_path = gr.Image(label="Upload a Screenshot", type="filepath", height=400)