qubvel-hf HF Staff commited on
Commit
9f2cbff
·
1 Parent(s): 455454b

Fix video writing

Browse files
Files changed (2) hide show
  1. app.py +17 -24
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,15 +1,15 @@
1
  import os
2
  import cv2
3
  import tqdm
4
- import shutil
5
- import tempfile
6
  import logging
7
- import supervision as sv
8
- import torch
9
 
 
10
  import spaces
11
- import gradio as gr
12
  import numpy as np
 
 
 
13
 
14
  from pathlib import Path
15
  from functools import lru_cache
@@ -151,13 +151,18 @@ def process_image(
151
 
152
  def get_target_size(image_height, image_width, max_size: int):
153
  if image_height < max_size and image_width < max_size:
154
- return image_width, image_height
155
- if image_height > image_width:
156
  new_height = max_size
157
  new_width = int(image_width * max_size / image_height)
158
  else:
159
  new_width = max_size
160
  new_height = int(image_height * max_size / image_width)
 
 
 
 
 
161
  return new_width, new_height
162
 
163
 
@@ -201,11 +206,6 @@ def process_video(
201
  n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
202
  frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
203
 
204
- # Use H.264 codec for browser compatibility
205
- fourcc = cv2.VideoWriter_fourcc(*"H264")
206
- temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
207
- writer = cv2.VideoWriter(temp_file.name, fourcc, target_fps, (target_width, target_height))
208
-
209
  box_annotator = sv.BoxAnnotator(thickness=1)
210
  label_annotator = sv.LabelAnnotator(text_scale=0.5)
211
 
@@ -216,25 +216,18 @@ def process_video(
216
  target_size=(target_height, target_width),
217
  )
218
 
 
219
  for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
220
  frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
221
  detections = sv.Detections.from_transformers(result, id2label=id2label)
222
  detections = detections.with_nms(threshold=0.95, class_agnostic=True)
223
  annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
224
  annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)
225
- writer.write(cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))
226
-
227
- writer.release()
228
- temp_file.close()
229
-
230
- # Copy to persistent directory for Gradio access
231
- output_filename = f"output_{os.path.basename(temp_file.name)}"
232
- output_path = VIDEO_OUTPUT_DIR / output_filename
233
- shutil.copy(temp_file.name, output_path)
234
- os.unlink(temp_file.name) # Remove temporary file
235
- logger.info(f"Video saved to {output_path}")
236
 
237
- return str(output_path)
 
 
238
 
239
 
240
 
 
1
  import os
2
  import cv2
3
  import tqdm
4
+ import uuid
 
5
  import logging
 
 
6
 
7
+ import torch
8
  import spaces
 
9
  import numpy as np
10
+ import gradio as gr
11
+ import imageio.v3 as iio
12
+ import supervision as sv
13
 
14
  from pathlib import Path
15
  from functools import lru_cache
 
151
 
152
  def get_target_size(image_height, image_width, max_size: int):
153
  if image_height < max_size and image_width < max_size:
154
+ new_height, new_width = image_width, image_height
155
+ elif image_height > image_width:
156
  new_height = max_size
157
  new_width = int(image_width * max_size / image_height)
158
  else:
159
  new_width = max_size
160
  new_height = int(image_height * max_size / image_width)
161
+
162
+ # make even (for video codec compatibility)
163
+ new_height = new_height // 2 * 2
164
+ new_width = new_width // 2 * 2
165
+
166
  return new_width, new_height
167
 
168
 
 
206
  n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
207
  frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
208
 
 
 
 
 
 
209
  box_annotator = sv.BoxAnnotator(thickness=1)
210
  label_annotator = sv.LabelAnnotator(text_scale=0.5)
211
 
 
216
  target_size=(target_height, target_width),
217
  )
218
 
219
+ annotated_frames = []
220
  for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
221
  frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
222
  detections = sv.Detections.from_transformers(result, id2label=id2label)
223
  detections = detections.with_nms(threshold=0.95, class_agnostic=True)
224
  annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
225
  annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)
226
+ annotated_frames.append(annotated_frame)
 
 
 
 
 
 
 
 
 
 
227
 
228
+ output_filename = os.path.join(VIDEO_OUTPUT_DIR, f"output_{uuid.uuid4()}.mp4")
229
+ iio.imwrite(output_filename, annotated_frames, fps=target_fps, codec="h264") #, pixelformat="yuv420p")
230
+ return output_filename
231
 
232
 
233
 
requirements.txt CHANGED
@@ -7,4 +7,5 @@ ffmpeg-python
7
  tqdm
8
  pillow
9
  supervision
10
- spaces
 
 
7
  tqdm
8
  pillow
9
  supervision
10
+ spaces
11
+ imageio[pyav]