hujiecpp commited on
Commit
e83787f
·
1 Parent(s): 0204715

init project

Browse files
Files changed (1) hide show
  1. app.py +37 -31
app.py CHANGED
@@ -39,8 +39,7 @@ import torchvision.transforms as tvf
39
 
40
 
41
  silent = False
42
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
43
- pe3r = Models(device)
44
 
45
 
46
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
@@ -86,7 +85,7 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
86
  scene.export(file_obj=outfile)
87
  return outfile
88
 
89
- @spaces.GPU(duration=180)
90
  def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
91
  clean_depth=False, transparent_cams=False, cam_size=0.05):
92
  """
@@ -246,6 +245,8 @@ def slerp_multiple(vectors, t_values):
246
 
247
  @torch.no_grad
248
  def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
 
 
249
  sam_mask=[]
250
  img_area = original_size[0] * original_size[1]
251
 
@@ -299,6 +300,7 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
299
 
300
  @torch.no_grad
301
  def get_cog_feats(images):
 
302
  cog_seg_maps = []
303
  rev_cog_seg_maps = []
304
  inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
@@ -443,6 +445,8 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
443
  """
444
  if len(filelist) < 2:
445
  raise gradio.Error("Please input at least 2 images.")
 
 
446
 
447
  images = Images(filelist=filelist, device=device)
448
 
@@ -523,9 +527,11 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
523
  def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
524
  mask_sky, clean_depth, transparent_cams, cam_size):
525
 
 
 
526
  texts = [text]
527
  inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
528
- inputs = {key: value.to('cuda') for key, value in inputs.items()}
529
  with torch.no_grad():
530
  text_feats =pe3r.siglip.get_text_features(**inputs)
531
  text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
@@ -559,7 +565,7 @@ def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
559
 
560
  with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
561
  recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
562
- model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
563
  get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
564
 
565
  with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
@@ -594,11 +600,11 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
594
  # adjust the camera size in the output pointcloud
595
  cam_size = gradio.Slider(label="cam_size", value=0.05, minimum=0.001, maximum=0.1, step=0.001, visible=False)
596
  with gradio.Row():
597
- as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
598
  # two post process implemented
599
  mask_sky = gradio.Checkbox(value=False, label="Mask sky", visible=False)
600
  clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
601
- transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras")
602
 
603
  with gradio.Row():
604
  text_input = gradio.Textbox(label="Query Text")
@@ -622,30 +628,30 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
622
  mask_sky, clean_depth, transparent_cams, cam_size,
623
  scenegraph_type, winsize, refid],
624
  outputs=[scene, outmodel]) # , outgallery
625
- min_conf_thr.release(fn=model_from_scene_fun,
626
- inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
627
- clean_depth, transparent_cams, cam_size],
628
- outputs=outmodel)
629
- cam_size.change(fn=model_from_scene_fun,
630
- inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
631
- clean_depth, transparent_cams, cam_size],
632
- outputs=outmodel)
633
- as_pointcloud.change(fn=model_from_scene_fun,
634
- inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
635
- clean_depth, transparent_cams, cam_size],
636
- outputs=outmodel)
637
- mask_sky.change(fn=model_from_scene_fun,
638
- inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
639
- clean_depth, transparent_cams, cam_size],
640
- outputs=outmodel)
641
- clean_depth.change(fn=model_from_scene_fun,
642
- inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
643
- clean_depth, transparent_cams, cam_size],
644
- outputs=outmodel)
645
- transparent_cams.change(model_from_scene_fun,
646
- inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
647
- clean_depth, transparent_cams, cam_size],
648
- outputs=outmodel)
649
  find_btn.click(fn=get_3D_object_from_scene_fun,
650
  inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
651
  clean_depth, transparent_cams, cam_size],
 
39
 
40
 
41
  silent = False
42
+ pe3r = Models('cpu')
 
43
 
44
 
45
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
 
85
  scene.export(file_obj=outfile)
86
  return outfile
87
 
88
+ # @spaces.GPU(duration=180)
89
  def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
90
  clean_depth=False, transparent_cams=False, cam_size=0.05):
91
  """
 
245
 
246
  @torch.no_grad
247
  def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
248
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
249
+
250
  sam_mask=[]
251
  img_area = original_size[0] * original_size[1]
252
 
 
300
 
301
  @torch.no_grad
302
  def get_cog_feats(images):
303
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
304
  cog_seg_maps = []
305
  rev_cog_seg_maps = []
306
  inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
 
445
  """
446
  if len(filelist) < 2:
447
  raise gradio.Error("Please input at least 2 images.")
448
+
449
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
450
 
451
  images = Images(filelist=filelist, device=device)
452
 
 
527
  def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
528
  mask_sky, clean_depth, transparent_cams, cam_size):
529
 
530
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
531
+
532
  texts = [text]
533
  inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
534
+ inputs = {key: value.to(device) for key, value in inputs.items()}
535
  with torch.no_grad():
536
  text_feats =pe3r.siglip.get_text_features(**inputs)
537
  text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
 
565
 
566
  with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
567
  recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
568
+ # model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
569
  get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
570
 
571
  with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
 
600
  # adjust the camera size in the output pointcloud
601
  cam_size = gradio.Slider(label="cam_size", value=0.05, minimum=0.001, maximum=0.1, step=0.001, visible=False)
602
  with gradio.Row():
603
+ as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud", visible=False)
604
  # two post process implemented
605
  mask_sky = gradio.Checkbox(value=False, label="Mask sky", visible=False)
606
  clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
607
+ transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras", visible=False)
608
 
609
  with gradio.Row():
610
  text_input = gradio.Textbox(label="Query Text")
 
628
  mask_sky, clean_depth, transparent_cams, cam_size,
629
  scenegraph_type, winsize, refid],
630
  outputs=[scene, outmodel]) # , outgallery
631
+ # min_conf_thr.release(fn=model_from_scene_fun,
632
+ # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
633
+ # clean_depth, transparent_cams, cam_size],
634
+ # outputs=outmodel)
635
+ # cam_size.change(fn=model_from_scene_fun,
636
+ # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
637
+ # clean_depth, transparent_cams, cam_size],
638
+ # outputs=outmodel)
639
+ # as_pointcloud.change(fn=model_from_scene_fun,
640
+ # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
641
+ # clean_depth, transparent_cams, cam_size],
642
+ # outputs=outmodel)
643
+ # mask_sky.change(fn=model_from_scene_fun,
644
+ # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
645
+ # clean_depth, transparent_cams, cam_size],
646
+ # outputs=outmodel)
647
+ # clean_depth.change(fn=model_from_scene_fun,
648
+ # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
649
+ # clean_depth, transparent_cams, cam_size],
650
+ # outputs=outmodel)
651
+ # transparent_cams.change(model_from_scene_fun,
652
+ # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
653
+ # clean_depth, transparent_cams, cam_size],
654
+ # outputs=outmodel)
655
  find_btn.click(fn=get_3D_object_from_scene_fun,
656
  inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
657
  clean_depth, transparent_cams, cam_size],