tanthinhdt commited on
Commit
5fee850
·
verified ·
1 Parent(s): 4703605

fix(app): adjust some params

Browse files
Files changed (1) hide show
  1. app.py +48 -69
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import yaml
2
  import gradio as gr
3
  from mediapipe.python.solutions import holistic
4
  from torchvision.transforms.v2 import Compose, Lambda, Normalize
@@ -62,17 +61,38 @@ examples = [
62
  ]
63
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def inference(
66
  video: str,
67
  k: int,
68
- model,
69
- keypoints_detector,
70
- data_height: int,
71
- data_width: int,
72
- model_input_height: int,
73
- model_input_width: int,
74
- device: str,
75
- transform: Compose,
76
  progress: gr.Progress = gr.Progress(),
77
  ) -> tuple:
78
  progress(0, desc='Preprocessing video')
@@ -80,8 +100,6 @@ def inference(
80
  model_num_frames=model.config.num_frames,
81
  keypoints_detector=keypoints_detector,
82
  source=video,
83
- data_height=data_height,
84
- data_width=data_width,
85
  model_input_height=model_input_height,
86
  model_input_width=model_input_width,
87
  device=device,
@@ -100,61 +118,22 @@ def inference(
100
  return output_message
101
 
102
 
103
- if __name__ == '__main__':
104
- with open('config.yaml', 'r') as file:
105
- config = yaml.safe_load(file)
106
-
107
- device = 'cpu'
108
- image_processor = VideoMAEImageProcessor.from_pretrained(config['model']['name'])
109
- model = VideoMAEForVideoClassification.from_pretrained(config['model']['name'])
110
- model = model.eval().to(device)
111
-
112
- mean = image_processor.image_mean
113
- std = image_processor.image_std
114
- if 'shortest_edge' in image_processor.size:
115
- height = width = image_processor.size['shortest_edge']
116
- else:
117
- height = image_processor.size['height']
118
- width = image_processor.size['width']
119
-
120
- keypoints_detector = holistic.Holistic(
121
- static_image_mode=False,
122
- model_complexity=2,
123
- enable_segmentation=True,
124
- refine_face_landmarks=True,
125
- )
126
-
127
- transform = Compose(
128
- [
129
- Lambda(lambda x: x / 255.0),
130
- Normalize(mean=mean, std=std),
131
- ]
132
- )
133
-
134
- iface = gr.Interface(
135
- fn=inference,
136
- inputs=[
137
- 'video',
138
- gr.components.Slider(
139
- minimum=1,
140
- maximum=5,
141
- value=3,
142
- step=1,
143
- label='k',
144
- info='Return top-k results',
145
- ),
146
- model,
147
- keypoints_detector,
148
- config['data']['height'],
149
- config['data']['width'],
150
- height,
151
- width,
152
- device,
153
- transform,
154
- ],
155
- outputs='text',
156
- examples=examples,
157
- title=title,
158
- description=description,
159
- )
160
- iface.launch()
 
 
1
  import gradio as gr
2
  from mediapipe.python.solutions import holistic
3
  from torchvision.transforms.v2 import Compose, Lambda, Normalize
 
61
  ]
62
 
63
 
64
+ device = 'cpu'
65
+ model_name = 'VieSignLang/videomae_skeleton_v1.0'
66
+ image_processor = VideoMAEImageProcessor.from_pretrained(model_name)
67
+ model = VideoMAEForVideoClassification.from_pretrained(model_name)
68
+ model = model.eval().to(device)
69
+
70
+ mean = image_processor.image_mean
71
+ std = image_processor.image_std
72
+ if 'shortest_edge' in image_processor.size:
73
+ model_input_height = model_input_width = image_processor.size['shortest_edge']
74
+ else:
75
+ model_input_height = image_processor.size['height']
76
+ model_input_width = image_processor.size['width']
77
+
78
+ keypoints_detector = holistic.Holistic(
79
+ static_image_mode=False,
80
+ model_complexity=2,
81
+ enable_segmentation=True,
82
+ refine_face_landmarks=True,
83
+ )
84
+
85
+ transform = Compose(
86
+ [
87
+ Lambda(lambda x: x / 255.0),
88
+ Normalize(mean=mean, std=std),
89
+ ]
90
+ )
91
+
92
+
93
  def inference(
94
  video: str,
95
  k: int,
 
 
 
 
 
 
 
 
96
  progress: gr.Progress = gr.Progress(),
97
  ) -> tuple:
98
  progress(0, desc='Preprocessing video')
 
100
  model_num_frames=model.config.num_frames,
101
  keypoints_detector=keypoints_detector,
102
  source=video,
 
 
103
  model_input_height=model_input_height,
104
  model_input_width=model_input_width,
105
  device=device,
 
118
  return output_message
119
 
120
 
121
+ iface = gr.Interface(
122
+ fn=inference,
123
+ inputs=[
124
+ 'video',
125
+ gr.components.Slider(
126
+ minimum=1,
127
+ maximum=5,
128
+ value=3,
129
+ step=1,
130
+ label='k',
131
+ info='Return top-k results',
132
+ ),
133
+ ],
134
+ outputs='text',
135
+ examples=examples,
136
+ title=title,
137
+ description=description,
138
+ )
139
+ iface.launch()