Spaces:
Runtime error
Runtime error
Update app.py
#143
by
kafarasi
- opened
app.py
CHANGED
@@ -5,51 +5,80 @@ from __future__ import annotations
|
|
5 |
import os
|
6 |
import random
|
7 |
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
import gradio as gr
|
10 |
-
import imageio
|
11 |
-
import numpy as np
|
12 |
-
import torch
|
13 |
-
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
14 |
-
|
15 |
-
DESCRIPTION = '# [ModelScope Text to Video Synthesis](https://modelscope.cn/models/damo/text-to-video-synthesis/summary)'
|
16 |
-
DESCRIPTION += '\n<p>For Colab usage, you can view <a href="https://colab.research.google.com/drive/1uW1ZqswkQ9Z9bp5Nbo5z59cAn7I0hE6R?usp=sharing" style="text-decoration: underline;" target="_blank">this webpage</a>.(the latest update on 2023.03.21)</p>'
|
17 |
-
DESCRIPTION += '\n<p>This model can only be used for non-commercial purposes. To learn more about the model, take a look at the <a href="https://huggingface.co/damo-vilab/modelscope-damo-text-to-video-synthesis" style="text-decoration: underline;" target="_blank">model card</a>.</p>'
|
18 |
if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
|
19 |
-
DESCRIPTION += f'\n<p>For faster inference
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
pipe
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
def to_video(frames: list[np.ndarray], fps: int) -> str:
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
writer.
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
generator = torch.Generator().manual_seed(seed)
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
examples = [
|
55 |
['An astronaut riding a horse.', 0, 16, 25],
|
@@ -59,6 +88,7 @@ examples = [
|
|
59 |
|
60 |
with gr.Blocks(css='style.css') as demo:
|
61 |
gr.Markdown(DESCRIPTION)
|
|
|
62 |
with gr.Group():
|
63 |
with gr.Box():
|
64 |
with gr.Row(elem_id='prompt-container').style(equal_height=True):
|
@@ -67,10 +97,12 @@ with gr.Blocks(css='style.css') as demo:
|
|
67 |
show_label=False,
|
68 |
max_lines=1,
|
69 |
placeholder='Enter your prompt',
|
70 |
-
elem_id='prompt-text-input'
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
74 |
with gr.Accordion('Advanced options', open=False):
|
75 |
seed = gr.Slider(
|
76 |
label='Seed',
|
@@ -78,63 +110,37 @@ with gr.Blocks(css='style.css') as demo:
|
|
78 |
maximum=1000000,
|
79 |
step=1,
|
80 |
value=-1,
|
81 |
-
info='
|
|
|
82 |
num_frames = gr.Slider(
|
83 |
label='Number of frames',
|
84 |
minimum=16,
|
85 |
maximum=MAX_NUM_FRAMES,
|
86 |
step=1,
|
87 |
-
value=
|
88 |
-
info=
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
)
|
91 |
-
num_inference_steps = gr.Slider(label='Number of inference steps',
|
92 |
-
minimum=10,
|
93 |
-
maximum=50,
|
94 |
-
step=1,
|
95 |
-
value=25)
|
96 |
|
97 |
-
inputs = [
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
fn=generate,
|
107 |
-
cache_examples=os.getenv('SYSTEM') == 'spaces')
|
108 |
|
109 |
prompt.submit(fn=generate, inputs=inputs, outputs=result)
|
110 |
run_button.click(fn=generate, inputs=inputs, outputs=result)
|
111 |
-
|
112 |
-
|
113 |
-
with gr.Accordion(label='We are hiring(Based in Beijing / Hangzhou, China.)', open=False):
|
114 |
-
gr.HTML("""<div class="acknowledgments">
|
115 |
-
<p>
|
116 |
-
If you're looking for an exciting challenge and the opportunity to work with cutting-edge technologies in AIGC and large-scale pretraining, then we are the place for you. We are looking for talented, motivated and creative individuals to join our team. If you are interested, please send your CV to us.
|
117 |
-
</p>
|
118 |
-
<p>
|
119 |
-
<b>EMAIL: [email protected]</b>.
|
120 |
-
</p>
|
121 |
-
</div>
|
122 |
-
""")
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
<p>
|
128 |
-
Despite how impressive being able to turn text into video is, beware to the fact that this model may output content that reinforces or exacerbates societal biases. The training data includes LAION5B, ImageNet, Webvid and other public datasets. The model was not trained to realistically represent people or events, so using it to generate such content is beyond the model's capabilities.
|
129 |
-
</p>
|
130 |
-
<p>
|
131 |
-
It is not intended to generate content that is demeaning or harmful to people or their environment, culture, religion, etc. Similarly, it is not allowed to generate pornographic, violent and bloody content generation. <b>The model is meant for research purposes</b>.
|
132 |
-
</p>
|
133 |
-
<p>
|
134 |
-
To learn more about the model, head to its <a href="https://huggingface.co/damo-vilab/modelscope-damo-text-to-video-synthesis" style="text-decoration: underline;" target="_blank">model card</a>.
|
135 |
-
</p>
|
136 |
-
</div>
|
137 |
-
""")
|
138 |
-
|
139 |
-
|
140 |
-
demo.queue(api_open=False, max_size=15).launch()
|
|
|
5 |
import os
|
6 |
import random
|
7 |
import tempfile
|
8 |
+
import sys
|
9 |
+
|
10 |
+
# Check critical dependencies before proceeding
|
11 |
+
try:
|
12 |
+
import numpy as np
|
13 |
+
import torch
|
14 |
+
import gradio as gr
|
15 |
+
import imageio
|
16 |
+
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
17 |
+
except ImportError as e:
|
18 |
+
print(f"Error: Missing required dependency - {e}")
|
19 |
+
print("Please ensure requirements.txt includes: numpy, torch, diffusers, gradio, imageio")
|
20 |
+
sys.exit(1)
|
21 |
+
|
22 |
+
DESCRIPTION = '''# [ModelScope Text to Video Synthesis](https://modelscope.cn/models/damo/text-to-video-synthesis/summary)
|
23 |
+
<p>For Colab usage, you can view <a href="https://colab.research.google.com/drive/1uW1ZqswkQ9Z9bp5Nbo5z59cAn7I0hE6R?usp=sharing" style="text-decoration: underline;" target="_blank">this webpage</a>.</p>
|
24 |
+
<p>This model can only be used for non-commercial purposes. See the <a href="https://huggingface.co/damo-vilab/modelscope-damo-text-to-video-synthesis" style="text-decoration: underline;" target="_blank">model card</a>.</p>'''
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
|
27 |
+
DESCRIPTION += f'''\n<p>For faster inference, you may duplicate this space and upgrade to GPU.
|
28 |
+
<a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true">
|
29 |
+
<img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></p>'''
|
30 |
+
|
31 |
+
MAX_NUM_FRAMES = int(os.getenv('MAX_NUM_FRAMES', '64')) # Reduced from 200 for stability
|
32 |
+
DEFAULT_NUM_FRAMES = min(MAX_NUM_FRAMES, 16)
|
33 |
+
|
34 |
+
# Initialize pipeline with error handling
|
35 |
+
try:
|
36 |
+
pipe = DiffusionPipeline.from_pretrained(
|
37 |
+
'damo-vilab/text-to-video-ms-1.7b',
|
38 |
+
torch_dtype=torch.float16,
|
39 |
+
variant='fp16'
|
40 |
+
)
|
41 |
+
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
42 |
+
pipe.enable_model_cpu_offload()
|
43 |
+
pipe.enable_vae_slicing()
|
44 |
+
except Exception as e:
|
45 |
+
print(f"Failed to initialize pipeline: {e}")
|
46 |
+
print("This model requires significant GPU memory. Try a smaller model like 'cerspense/zeroscope_v2_576w' if needed.")
|
47 |
+
sys.exit(1)
|
48 |
|
49 |
def to_video(frames: list[np.ndarray], fps: int) -> str:
|
50 |
+
"""Convert frames to video using imageio with FFMPEG."""
|
51 |
+
try:
|
52 |
+
out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
|
53 |
+
writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps)
|
54 |
+
for frame in frames:
|
55 |
+
writer.append_data(frame)
|
56 |
+
writer.close()
|
57 |
+
return out_file.name
|
58 |
+
except Exception as e:
|
59 |
+
print(f"Video creation failed: {e}")
|
60 |
+
raise
|
61 |
+
|
62 |
+
def generate(prompt: str, seed: int, num_frames: int, num_inference_steps: int) -> str:
|
63 |
+
"""Generate video from text prompt."""
|
64 |
+
if not prompt.strip():
|
65 |
+
raise gr.Error("Please enter a valid prompt")
|
66 |
+
|
67 |
+
seed = random.randint(0, 1000000) if seed == -1 else seed
|
68 |
generator = torch.Generator().manual_seed(seed)
|
69 |
+
|
70 |
+
try:
|
71 |
+
frames = pipe(
|
72 |
+
prompt,
|
73 |
+
num_inference_steps=num_inference_steps,
|
74 |
+
num_frames=num_frames,
|
75 |
+
generator=generator
|
76 |
+
).frames
|
77 |
+
return to_video(frames, 8)
|
78 |
+
except torch.cuda.OutOfMemoryError:
|
79 |
+
raise gr.Error("Out of GPU memory - Try reducing frame count or use a smaller model")
|
80 |
+
except Exception as e:
|
81 |
+
raise gr.Error(f"Generation failed: {str(e)}")
|
82 |
|
83 |
examples = [
|
84 |
['An astronaut riding a horse.', 0, 16, 25],
|
|
|
88 |
|
89 |
with gr.Blocks(css='style.css') as demo:
|
90 |
gr.Markdown(DESCRIPTION)
|
91 |
+
|
92 |
with gr.Group():
|
93 |
with gr.Box():
|
94 |
with gr.Row(elem_id='prompt-container').style(equal_height=True):
|
|
|
97 |
show_label=False,
|
98 |
max_lines=1,
|
99 |
placeholder='Enter your prompt',
|
100 |
+
elem_id='prompt-text-input'
|
101 |
+
)
|
102 |
+
run_button = gr.Button('Generate video')
|
103 |
+
|
104 |
+
result = gr.Video(label='Result', show_label=False)
|
105 |
+
|
106 |
with gr.Accordion('Advanced options', open=False):
|
107 |
seed = gr.Slider(
|
108 |
label='Seed',
|
|
|
110 |
maximum=1000000,
|
111 |
step=1,
|
112 |
value=-1,
|
113 |
+
info='-1 = random seed each time'
|
114 |
+
)
|
115 |
num_frames = gr.Slider(
|
116 |
label='Number of frames',
|
117 |
minimum=16,
|
118 |
maximum=MAX_NUM_FRAMES,
|
119 |
step=1,
|
120 |
+
value=DEFAULT_NUM_FRAMES,
|
121 |
+
info='Higher values require more GPU memory'
|
122 |
+
)
|
123 |
+
num_inference_steps = gr.Slider(
|
124 |
+
label='Inference steps',
|
125 |
+
minimum=10,
|
126 |
+
maximum=50,
|
127 |
+
step=1,
|
128 |
+
value=25
|
129 |
)
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
+
inputs = [prompt, seed, num_frames, num_inference_steps]
|
132 |
+
|
133 |
+
gr.Examples(
|
134 |
+
examples=examples,
|
135 |
+
inputs=inputs,
|
136 |
+
outputs=result,
|
137 |
+
fn=generate,
|
138 |
+
cache_examples=os.getenv('SYSTEM') == 'spaces'
|
139 |
+
)
|
|
|
|
|
140 |
|
141 |
prompt.submit(fn=generate, inputs=inputs, outputs=result)
|
142 |
run_button.click(fn=generate, inputs=inputs, outputs=result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
+
# Additional UI sections remain unchanged...
|
145 |
+
|
146 |
+
demo.queue(max_size=10).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|