Spaces:
Running
on
Zero
Running
on
Zero
Update inferencer.py
Browse files- inferencer.py +10 -10
inferencer.py
CHANGED
@@ -228,8 +228,8 @@ class InterleaveInferencer:
|
|
228 |
image_shapes=(1024, 1024), # Default, can be overridden by actual input image
|
229 |
):
|
230 |
gen_context = self.init_gen_context()
|
231 |
-
cfg_text_context =
|
232 |
-
cfg_img_context =
|
233 |
|
234 |
current_image_shapes = image_shapes
|
235 |
|
@@ -243,15 +243,16 @@ class InterleaveInferencer:
|
|
243 |
|
244 |
for input_term in input_lists:
|
245 |
if isinstance(input_term, str):
|
|
|
246 |
gen_context = self.update_context_text(input_term, gen_context)
|
247 |
-
cfg_text_context = self.update_context_text(input_term, cfg_text_context)
|
248 |
cfg_img_context = self.update_context_text(input_term, cfg_img_context)
|
|
|
249 |
elif isinstance(input_term, Image.Image):
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
cfg_text_context =
|
254 |
-
|
255 |
else:
|
256 |
raise ValueError(f"Unsupported input type: {type(input_term)}")
|
257 |
|
@@ -266,10 +267,9 @@ class InterleaveInferencer:
|
|
266 |
full_thought_text = "".join(thought_text_parts)
|
267 |
if full_thought_text: # Only update if thought was generated
|
268 |
gen_context = self.update_context_text(full_thought_text, gen_context)
|
269 |
-
cfg_text_context = self.update_context_text(full_thought_text, cfg_text_context)
|
270 |
|
271 |
img = self.gen_image(
|
272 |
-
image_shape=
|
273 |
gen_context=gen_context,
|
274 |
cfg_text_precontext=cfg_text_context,
|
275 |
cfg_img_precontext=cfg_img_context,
|
|
|
228 |
image_shapes=(1024, 1024), # Default, can be overridden by actual input image
|
229 |
):
|
230 |
gen_context = self.init_gen_context()
|
231 |
+
cfg_text_context = deepcopy(gen_context)
|
232 |
+
cfg_img_context = deepcopy(gen_context)
|
233 |
|
234 |
current_image_shapes = image_shapes
|
235 |
|
|
|
243 |
|
244 |
for input_term in input_lists:
|
245 |
if isinstance(input_term, str):
|
246 |
+
cfg_text_context = deepcopy(gen_context)
|
247 |
gen_context = self.update_context_text(input_term, gen_context)
|
|
|
248 |
cfg_img_context = self.update_context_text(input_term, cfg_img_context)
|
249 |
+
|
250 |
elif isinstance(input_term, Image.Image):
|
251 |
+
input_term = self.vae_transform.resize_transform(pil_img2rgb(input_term))
|
252 |
+
gen_context = self.update_context_image(input_term, gen_context, vae=not understanding_output)
|
253 |
+
image_shapes = input_term.size[::-1]
|
254 |
+
cfg_text_context = deepcopy(gen_context)
|
255 |
+
|
256 |
else:
|
257 |
raise ValueError(f"Unsupported input type: {type(input_term)}")
|
258 |
|
|
|
267 |
full_thought_text = "".join(thought_text_parts)
|
268 |
if full_thought_text: # Only update if thought was generated
|
269 |
gen_context = self.update_context_text(full_thought_text, gen_context)
|
|
|
270 |
|
271 |
img = self.gen_image(
|
272 |
+
image_shape=image_shapes,
|
273 |
gen_context=gen_context,
|
274 |
cfg_text_precontext=cfg_text_context,
|
275 |
cfg_img_precontext=cfg_img_context,
|