Spaces:
Running
on
Zero
Running
on
Zero
Fix video writing
Browse files- app.py +17 -24
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
import os
|
2 |
import cv2
|
3 |
import tqdm
|
4 |
-
import
|
5 |
-
import tempfile
|
6 |
import logging
|
7 |
-
import supervision as sv
|
8 |
-
import torch
|
9 |
|
|
|
10 |
import spaces
|
11 |
-
import gradio as gr
|
12 |
import numpy as np
|
|
|
|
|
|
|
13 |
|
14 |
from pathlib import Path
|
15 |
from functools import lru_cache
|
@@ -151,13 +151,18 @@ def process_image(
|
|
151 |
|
152 |
def get_target_size(image_height, image_width, max_size: int):
|
153 |
if image_height < max_size and image_width < max_size:
|
154 |
-
|
155 |
-
|
156 |
new_height = max_size
|
157 |
new_width = int(image_width * max_size / image_height)
|
158 |
else:
|
159 |
new_width = max_size
|
160 |
new_height = int(image_height * max_size / image_width)
|
|
|
|
|
|
|
|
|
|
|
161 |
return new_width, new_height
|
162 |
|
163 |
|
@@ -201,11 +206,6 @@ def process_video(
|
|
201 |
n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
|
202 |
frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
|
203 |
|
204 |
-
# Use H.264 codec for browser compatibility
|
205 |
-
fourcc = cv2.VideoWriter_fourcc(*"H264")
|
206 |
-
temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
207 |
-
writer = cv2.VideoWriter(temp_file.name, fourcc, target_fps, (target_width, target_height))
|
208 |
-
|
209 |
box_annotator = sv.BoxAnnotator(thickness=1)
|
210 |
label_annotator = sv.LabelAnnotator(text_scale=0.5)
|
211 |
|
@@ -216,25 +216,18 @@ def process_video(
|
|
216 |
target_size=(target_height, target_width),
|
217 |
)
|
218 |
|
|
|
219 |
for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
|
220 |
frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
|
221 |
detections = sv.Detections.from_transformers(result, id2label=id2label)
|
222 |
detections = detections.with_nms(threshold=0.95, class_agnostic=True)
|
223 |
annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
|
224 |
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)
|
225 |
-
|
226 |
-
|
227 |
-
writer.release()
|
228 |
-
temp_file.close()
|
229 |
-
|
230 |
-
# Copy to persistent directory for Gradio access
|
231 |
-
output_filename = f"output_{os.path.basename(temp_file.name)}"
|
232 |
-
output_path = VIDEO_OUTPUT_DIR / output_filename
|
233 |
-
shutil.copy(temp_file.name, output_path)
|
234 |
-
os.unlink(temp_file.name) # Remove temporary file
|
235 |
-
logger.info(f"Video saved to {output_path}")
|
236 |
|
237 |
-
|
|
|
|
|
238 |
|
239 |
|
240 |
|
|
|
1 |
import os
|
2 |
import cv2
|
3 |
import tqdm
|
4 |
+
import uuid
|
|
|
5 |
import logging
|
|
|
|
|
6 |
|
7 |
+
import torch
|
8 |
import spaces
|
|
|
9 |
import numpy as np
|
10 |
+
import gradio as gr
|
11 |
+
import imageio.v3 as iio
|
12 |
+
import supervision as sv
|
13 |
|
14 |
from pathlib import Path
|
15 |
from functools import lru_cache
|
|
|
151 |
|
152 |
def get_target_size(image_height, image_width, max_size: int):
|
153 |
if image_height < max_size and image_width < max_size:
|
154 |
+
new_height, new_width = image_width, image_height
|
155 |
+
elif image_height > image_width:
|
156 |
new_height = max_size
|
157 |
new_width = int(image_width * max_size / image_height)
|
158 |
else:
|
159 |
new_width = max_size
|
160 |
new_height = int(image_height * max_size / image_width)
|
161 |
+
|
162 |
+
# make even (for video codec compatibility)
|
163 |
+
new_height = new_height // 2 * 2
|
164 |
+
new_width = new_width // 2 * 2
|
165 |
+
|
166 |
return new_width, new_height
|
167 |
|
168 |
|
|
|
206 |
n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
|
207 |
frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
|
208 |
|
|
|
|
|
|
|
|
|
|
|
209 |
box_annotator = sv.BoxAnnotator(thickness=1)
|
210 |
label_annotator = sv.LabelAnnotator(text_scale=0.5)
|
211 |
|
|
|
216 |
target_size=(target_height, target_width),
|
217 |
)
|
218 |
|
219 |
+
annotated_frames = []
|
220 |
for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
|
221 |
frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
|
222 |
detections = sv.Detections.from_transformers(result, id2label=id2label)
|
223 |
detections = detections.with_nms(threshold=0.95, class_agnostic=True)
|
224 |
annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
|
225 |
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)
|
226 |
+
annotated_frames.append(annotated_frame)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
+
output_filename = os.path.join(VIDEO_OUTPUT_DIR, f"output_{uuid.uuid4()}.mp4")
|
229 |
+
iio.imwrite(output_filename, annotated_frames, fps=target_fps, codec="h264") #, pixelformat="yuv420p")
|
230 |
+
return output_filename
|
231 |
|
232 |
|
233 |
|
requirements.txt
CHANGED
@@ -7,4 +7,5 @@ ffmpeg-python
|
|
7 |
tqdm
|
8 |
pillow
|
9 |
supervision
|
10 |
-
spaces
|
|
|
|
7 |
tqdm
|
8 |
pillow
|
9 |
supervision
|
10 |
+
spaces
|
11 |
+
imageio[pyav]
|