BoboiAzumi commited on
Commit
3e62834
1 Parent(s): a744fab
Files changed (2) hide show
  1. app.py +0 -2
  2. process.py +7 -7
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import gradio as gr
2
- import spaces
3
 
4
  from process import inference
5
 
6
- @spaces.GPU
7
  def clickit(video, prompt):
8
  return inference(
9
  video,
 
1
  import gradio as gr
 
2
 
3
  from process import inference
4
 
 
5
  def clickit(video, prompt):
6
  return inference(
7
  video,
process.py CHANGED
@@ -1,4 +1,5 @@
1
  import io
 
2
 
3
  import argparse
4
  import numpy as np
@@ -6,6 +7,7 @@ import torch
6
  from decord import cpu, VideoReader, bridge
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
 
 
9
  MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
10
 
11
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -60,12 +62,8 @@ model = AutoModelForCausalLM.from_pretrained(
60
  trust_remote_code=True
61
  ).eval().to(DEVICE)
62
 
63
-
64
- def predict(prompt, video_data, temperature):
65
- strategy = 'chat'
66
-
67
- video = load_video(video_data, strategy=strategy)
68
-
69
  history = []
70
  query = prompt
71
  inputs = model.build_conversation_input_ids(
@@ -98,6 +96,8 @@ def predict(prompt, video_data, temperature):
98
 
99
  def inference(video, prompt):
100
  temperature = 0.1
101
- video_data = open(video, 'rb').read()
 
 
102
  response = predict(prompt, video_data, temperature)
103
  return response
 
1
  import io
2
+ import spaces
3
 
4
  import argparse
5
  import numpy as np
 
7
  from decord import cpu, VideoReader, bridge
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
10
+
11
  MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
12
 
13
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 
62
  trust_remote_code=True
63
  ).eval().to(DEVICE)
64
 
65
+ @spaces.GPU
66
+ def predict(prompt, video, temperature):
 
 
 
 
67
  history = []
68
  query = prompt
69
  inputs = model.build_conversation_input_ids(
 
96
 
97
  def inference(video, prompt):
98
  temperature = 0.1
99
+ video = open(video, 'rb').read()
100
+ strategy = 'chat'
101
+ video_data = load_video(video, strategy=strategy)
102
  response = predict(prompt, video_data, temperature)
103
  return response